output.var = params$output.var
transform.abs = FALSE
log.pred = params$log.pred
norm.pred = params$norm.pred
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 9
## $ output.var : chr "y3"
## $ log.pred : logi FALSE
## $ norm.pred : logi FALSE
## $ eda : logi FALSE
## $ algo.forward.caret : logi TRUE
## $ algo.backward.caret: logi TRUE
## $ algo.stepwise.caret: logi TRUE
## $ algo.LASSO.caret : logi TRUE
## $ algo.LARS.caret : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE) paste0(output.var,'.log') else output.var.tr = output.var
# output.var.tr = if (log.pred == TRUE) paste0(output.var,'.cuberoot') else output.var.tr = output.var
# output.var.tr = if (norm.pred == TRUE) paste0(output.var,'.bestnorm') else output.var.tr = output.var
feat = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')
cc = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
## JobName y3
## Job_00001: 1 Min. : 95.91
## Job_00002: 1 1st Qu.:118.29
## Job_00003: 1 Median :124.03
## Job_00004: 1 Mean :125.40
## Job_00007: 1 3rd Qu.:131.06
## Job_00008: 1 Max. :193.73
## (Other) :6974
The Output Variable y3 shows right skewness, so will proceed with a log transformation
df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density()
#stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
ggplot(gather(select_at(data,output.var)), aes(sample=value)) +
stat_qq() +
facet_wrap(~key, scales = 'free',ncol=4)
if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
# if(log.pred==TRUE) data[[output.var.tr]] = (data[[output.var]])^(1/3) else
data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=2)
ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) +
stat_qq() +
facet_wrap(~key, scales = 'free',ncol=4)
Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project
if (norm.pred == TRUE){
t=bestNormalize::bestNormalize(data[[output.var]])
t
qqnorm(data[[output.var]])
qqnorm(predict(t))
data[[output.var.tr]] = predict(t)
}
orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution
data$x2byx1 = data$x2/data$x1
data$x6byx5 = data$x6/data$x5
data$x9byx7 = data$x9/data$x7
data$x10byx8 = data$x10/data$x8
data$x14byx12 = data$x14/data$x12
data$x15byx13 = data$x15/data$x13
data$x17byx16 = data$x17/data$x16
data$x19byx18 = data$x19/data$x18
data$x21byx20 = data$x21/data$x20
data$x23byx22 = data$x23/data$x22
data$x1log = log(data$x1)
data$x2log = log(data$x2)
data$x5log = log(data$x5)
data$x6log = log(data$x6)
data$x7log = log(data$x7)
data$x9log = log(data$x9)
data$x8log = log(data$x8)
data$x10log = log(data$x10)
data$x12log = log(data$x12)
data$x14log = log(data$x14)
data$x13log = log(data$x13)
data$x15log = log(data$x15)
data$x16log = log(data$x16)
data$x17log = log(data$x17)
data$x18log = log(data$x18)
data$x19log = log(data$x19)
data$x20log = log(data$x20)
data$x21log = log(data$x21)
data$x22log = log(data$x22)
data$x23log = log(data$x23)
data$x11log = log(data$x11)
data$x1sqinv = 1/(data$x1)^2
data$x5sqinv = 1/(data$x5)^2
data$x7sqinv = 1/(data$x7)^2
data$x8sqinv = 1/(data$x8)^2
data$x12sqinv = 1/(data$x12)^2
data$x13sqinv = 1/(data$x13)^2
data$x16sqinv = 1/(data$x16)^2
data$x18sqinv = 1/(data$x18)^2
data$x20sqinv = 1/(data$x20)^2
data$x22sqinv = 1/(data$x22)^2
predictors
## [1] "x1" "x2" "x3" "x4" "x5" "x6" "x7" "x8" "x9" "x10" "x11"
## [12] "x12" "x13" "x14" "x15" "x16" "x17" "x18" "x19" "x20" "x21" "x22"
## [23] "x23" "stat1" "stat2" "stat3" "stat4" "stat5" "stat6" "stat7" "stat8" "stat9" "stat10"
## [34] "stat11" "stat12" "stat13" "stat14" "stat15" "stat16" "stat17" "stat18" "stat19" "stat20" "stat21"
## [45] "stat22" "stat23" "stat24" "stat25" "stat26" "stat27" "stat28" "stat29" "stat30" "stat31" "stat32"
## [56] "stat33" "stat34" "stat35" "stat36" "stat37" "stat38" "stat39" "stat40" "stat41" "stat42" "stat43"
## [67] "stat44" "stat45" "stat46" "stat47" "stat48" "stat49" "stat50" "stat51" "stat52" "stat53" "stat54"
## [78] "stat55" "stat56" "stat57" "stat58" "stat59" "stat60" "stat61" "stat62" "stat63" "stat64" "stat65"
## [89] "stat66" "stat67" "stat68" "stat69" "stat70" "stat71" "stat72" "stat73" "stat74" "stat75" "stat76"
## [100] "stat77" "stat78" "stat79" "stat80" "stat81" "stat82" "stat83" "stat84" "stat85" "stat86" "stat87"
## [111] "stat88" "stat89" "stat90" "stat91" "stat92" "stat93" "stat94" "stat95" "stat96" "stat97" "stat98"
## [122] "stat99" "stat100" "stat101" "stat102" "stat103" "stat104" "stat105" "stat106" "stat107" "stat108" "stat109"
## [133] "stat110" "stat111" "stat112" "stat113" "stat114" "stat115" "stat116" "stat117" "stat118" "stat119" "stat120"
## [144] "stat121" "stat122" "stat123" "stat124" "stat125" "stat126" "stat127" "stat128" "stat129" "stat130" "stat131"
## [155] "stat132" "stat133" "stat134" "stat135" "stat136" "stat137" "stat138" "stat139" "stat140" "stat141" "stat142"
## [166] "stat143" "stat144" "stat145" "stat146" "stat147" "stat148" "stat149" "stat150" "stat151" "stat152" "stat153"
## [177] "stat154" "stat155" "stat156" "stat157" "stat158" "stat159" "stat160" "stat161" "stat162" "stat163" "stat164"
## [188] "stat165" "stat166" "stat167" "stat168" "stat169" "stat170" "stat171" "stat172" "stat173" "stat174" "stat175"
## [199] "stat176" "stat177" "stat178" "stat179" "stat180" "stat181" "stat182" "stat183" "stat184" "stat185" "stat186"
## [210] "stat187" "stat188" "stat189" "stat190" "stat191" "stat192" "stat193" "stat194" "stat195" "stat196" "stat197"
## [221] "stat198" "stat199" "stat200" "stat201" "stat202" "stat203" "stat204" "stat205" "stat206" "stat207" "stat208"
## [232] "stat209" "stat210" "stat211" "stat212" "stat213" "stat214" "stat215" "stat216" "stat217"
controlled.vars = colnames(data)[grep("^x",colnames(data))]
stat.vars = colnames(data)[grep("^stat",colnames(data))]
predictors = c(controlled.vars,stat.vars)
predictors
## [1] "x1" "x2" "x3" "x4" "x5" "x6" "x7" "x8" "x9" "x10"
## [11] "x11" "x12" "x13" "x14" "x15" "x16" "x17" "x18" "x19" "x20"
## [21] "x21" "x22" "x23" "x2byx1" "x6byx5" "x9byx7" "x10byx8" "x14byx12" "x15byx13" "x17byx16"
## [31] "x19byx18" "x21byx20" "x23byx22" "x1log" "x2log" "x5log" "x6log" "x7log" "x9log" "x8log"
## [41] "x10log" "x12log" "x14log" "x13log" "x15log" "x16log" "x17log" "x18log" "x19log" "x20log"
## [51] "x21log" "x22log" "x23log" "x11log" "x1sqinv" "x5sqinv" "x7sqinv" "x8sqinv" "x12sqinv" "x13sqinv"
## [61] "x16sqinv" "x18sqinv" "x20sqinv" "x22sqinv" "stat1" "stat2" "stat3" "stat4" "stat5" "stat6"
## [71] "stat7" "stat8" "stat9" "stat10" "stat11" "stat12" "stat13" "stat14" "stat15" "stat16"
## [81] "stat17" "stat18" "stat19" "stat20" "stat21" "stat22" "stat23" "stat24" "stat25" "stat26"
## [91] "stat27" "stat28" "stat29" "stat30" "stat31" "stat32" "stat33" "stat34" "stat35" "stat36"
## [101] "stat37" "stat38" "stat39" "stat40" "stat41" "stat42" "stat43" "stat44" "stat45" "stat46"
## [111] "stat47" "stat48" "stat49" "stat50" "stat51" "stat52" "stat53" "stat54" "stat55" "stat56"
## [121] "stat57" "stat58" "stat59" "stat60" "stat61" "stat62" "stat63" "stat64" "stat65" "stat66"
## [131] "stat67" "stat68" "stat69" "stat70" "stat71" "stat72" "stat73" "stat74" "stat75" "stat76"
## [141] "stat77" "stat78" "stat79" "stat80" "stat81" "stat82" "stat83" "stat84" "stat85" "stat86"
## [151] "stat87" "stat88" "stat89" "stat90" "stat91" "stat92" "stat93" "stat94" "stat95" "stat96"
## [161] "stat97" "stat98" "stat99" "stat100" "stat101" "stat102" "stat103" "stat104" "stat105" "stat106"
## [171] "stat107" "stat108" "stat109" "stat110" "stat111" "stat112" "stat113" "stat114" "stat115" "stat116"
## [181] "stat117" "stat118" "stat119" "stat120" "stat121" "stat122" "stat123" "stat124" "stat125" "stat126"
## [191] "stat127" "stat128" "stat129" "stat130" "stat131" "stat132" "stat133" "stat134" "stat135" "stat136"
## [201] "stat137" "stat138" "stat139" "stat140" "stat141" "stat142" "stat143" "stat144" "stat145" "stat146"
## [211] "stat147" "stat148" "stat149" "stat150" "stat151" "stat152" "stat153" "stat154" "stat155" "stat156"
## [221] "stat157" "stat158" "stat159" "stat160" "stat161" "stat162" "stat163" "stat164" "stat165" "stat166"
## [231] "stat167" "stat168" "stat169" "stat170" "stat171" "stat172" "stat173" "stat174" "stat175" "stat176"
## [241] "stat177" "stat178" "stat179" "stat180" "stat181" "stat182" "stat183" "stat184" "stat185" "stat186"
## [251] "stat187" "stat188" "stat189" "stat190" "stat191" "stat192" "stat193" "stat194" "stat195" "stat196"
## [261] "stat197" "stat198" "stat199" "stat200" "stat201" "stat202" "stat203" "stat204" "stat205" "stat206"
## [271] "stat207" "stat208" "stat209" "stat210" "stat211" "stat212" "stat213" "stat214" "stat215" "stat216"
## [281] "stat217"
All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)
Histograms
if (eda == TRUE){
cols = c('x11','x18','stat98','x7','stat110')
df=gather(select_at(data,cols))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=3)
# ggplot(gather(select_at(data,cols)), aes(sample=value)) +
# stat_qq()+
# facet_wrap(~key, scales = 'free',ncol=2)
lapply(select_at(data,cols),summary)
}
Scatter plot vs. output variable **y3
if (eda == TRUE){
d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light green',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=3)
}
All indicators have a strong indication of Fat-Tails
if (eda == TRUE){
df=gather(select_at(data,predictors))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=4)
}
if (eda == TRUE){
#chart.Correlation(select(data,-JobName), pch=21)
# https://stackoverflow.com/questions/27034655/how-to-use-dplyrarrangedesc-when-using-a-string-as-column-name
t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
,select_at(data,output.var.tr)),4)) %>%
#rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-!!sym(output.var.tr))
#DT::datatable(t)
message("Top Positive")
#kable(head(arrange(t,desc(y3.log)),20))
kable(head(arrange(t,desc(!!sym(output.var.tr))),20))
message("Top Negative")
#kable(head(arrange(t,y3.log),20))
kable(head(arrange(t,!!sym(output.var.tr)),20))
}
if (eda == TRUE){
#chart.Correlation(select(data,-JobName), pch=21)
t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
#DT::datatable(t,options=list(scrollX=T))
message("Showing only 10 variables")
kable(t[1:10,1:10])
}
Scatter plots with all predictors and the output variable (y3)
if (eda == TRUE){
d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light blue',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=4)
}
No Multicollinearity among predictors
Showing Top predictor by VIF Value
if (eda == TRUE){
vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
head(vifDF,75)
}
data.tr=data %>%
mutate(x18.sqrt = sqrt(x18))
cols=c('x18','x18.sqrt')
# ggplot(gather(select_at(data.tr,cols)), aes(value)) +
# geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
# geom_density() +
# facet_wrap(~key, scales = 'free',ncol=4)
d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light blue',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#removing unwanted variables
data.tr=data.tr %>%
#dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])
dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('JobName')])
data=data.tr
label.names=output.var.tr
# 0 for no interaction,
# 1 for Full 2 way interaction and
# 2 for Selective 2 way interaction
# 3 for Selective 3 way interaction
InteractionMode = 2
pca.vars = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]
# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)
if(InteractionMode == 1){
pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
#saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
pca.model = prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode >= 2 & InteractionMode <= 3){
controlled.vars = pca.vars[grep("^x",pca.vars)]
stat.vars = pca.vars[grep("^stat",pca.vars)]
if (InteractionMode >= 2){
interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
}
if (InteractionMode >= 3){
interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^3')
}
no.interact.form = paste0(stat.vars, collapse ='+')
pca.formula = as.formula(paste(interaction.form, no.interact.form, sep = "+"))
pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}
stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
targetCumVar = .9
pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 164 PCAs justify 90.0% of the total Variance. (90.0%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained", type='b')
plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')
screeplot(pca.model,npcs = pca.model$pcaSelCount)
screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')
#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>%
dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
,!!colnames(pca.model$rotation)[pca.model$pcaSel])
)
data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)
data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)
plot.diagnostics <- function(model, train) {
plot(model)
residuals = resid(model) # Plotted above in plot(lm.out)
r.standard = rstandard(model)
r.student = rstudent(model)
df = data.frame(x=predict(model,train),y=r.student)
p=ggplot(data=df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_hline(yintercept = 0,size=1)+
ylab("Student Residuals") +
xlab("Predicted Values")+
ggtitle("Student Residual Plot")
plot(p)
df = data.frame(x=predict(model,train),y=r.standard)
p=ggplot(data=df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_hline(yintercept = c(-2,0,2),size=1)+
ylab("Student Residuals") +
xlab("Predicted Values")+
ggtitle("Student Residual Plot")
plot(p)
# Histogram
df=data.frame(r.student)
p=ggplot(data=df,aes(r.student)) +
geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) +
stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
ylab("Density")+
xlab("Studentized Residuals")+
ggtitle("Distribution of Studentized Residuals")
plot(p)
# http://www.stat.columbia.edu/~martin/W2024/R7.pdf
# Influential plots
inf.meas = influence.measures(model)
# print (summary(inf.meas)) # too much data
# Leverage plot
lev = hat(model.matrix(model))
df=tibble::rownames_to_column(as.data.frame(lev),'id')
p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
ylab('Leverage - check') +
xlab('Index')
plot(p)
# Cook's Distance
cd = cooks.distance(model)
df=tibble::rownames_to_column(as.data.frame(cd),'id')
p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
ylab('Cooks distances') +
geom_hline(yintercept = c(4/nrow(train),0),size=1)+
xlab('Index')
plot(p)
print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = ""))
print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = ""))
return(cd)
}
# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
#B is the number of resamples and integer vector of M (numbers + tune length if any)
B <- if (method == "cv") numbers
else if(method == "repeatedcv") numbers * repeats
else NULL
if(is.null(length)) {
seeds <- NULL
} else {
set.seed(seed = seed)
seeds <- vector(mode = "list", length = B)
seeds <- lapply(seeds, function(x) sample.int(n = 1000000
, size = numbers + ifelse(is.null(tunes), 0, tunes)))
seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
}
# return seeds
seeds
}
train.caret.glmselect = function(formula, data, method
,subopt = NULL, feature.names
, train.control = NULL, tune.grid = NULL, pre.proc = NULL){
if(is.null(train.control)){
train.control <- trainControl(method = "cv"
,number = 10
,seeds = setCaretSeeds(method = "cv"
, numbers = 10
, seed = 1701)
,search = "grid"
,verboseIter = TRUE
,allowParallel = TRUE
)
}
if(is.null(tune.grid)){
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
tune.grid = data.frame(nvmax = 1:length(feature.names))
}
if (method == 'glmnet' && subopt == 'LASSO'){
# Will only show 1 Lambda value during training, but that is OK
# https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
# Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
lambda = 10^seq(-2,0, length =100)
alpha = c(1)
tune.grid = expand.grid(alpha = alpha,lambda = lambda)
}
if (method == 'lars'){
# https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
fraction = seq(0, 1, length = 100)
tune.grid = expand.grid(fraction = fraction)
pre.proc = c("center", "scale")
}
}
# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)
set.seed(1)
# note that the seed has to actually be set just before this function is called
# settign is above just not ensure reproducibility for some reason
model.caret <- caret::train(formula
, data = data
, method = method
, tuneGrid = tune.grid
, trControl = train.control
, preProc = pre.proc
)
stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
print("All models results")
print(model.caret$results) # all model results
print("Best Model")
print(model.caret$bestTune) # best model
model = model.caret$finalModel
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-nvmax) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
# leap function does not support studentized residuals
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
id = rownames(model.caret$bestTune)
# Provides the coefficients of the best model
# regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
# https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
print("Coefficients of final model:")
coefs <- coef(model, id=id)
#calculate the model to the the coef intervals
nams <- names(coefs)
nams <- nams[!nams %in% "(Intercept)"]
response <- as.character(formula[[2]])
form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
mod <- lm(form, data = data)
#coefs
#coef(mod)
print(car::Confint(mod))
return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
,modelLM=mod))
}
if (method == 'glmnet' && subopt == 'LASSO'){
print(model.caret)
print(plot(model.caret))
print(model.caret$bestTune)
print(model.caret$results)
model=model.caret$finalModel
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-lambda) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
print("Coefficients")
#no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
t=coef(model,s=model.caret$bestTune$lambda)
model.coef = t[which(t[,1]!=0),]
print(as.data.frame(model.coef))
id = NULL # not really needed but added for consistency
return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
}
if (method == 'lars'){
print(model.caret)
print(plot(model.caret))
print(model.caret$bestTune)
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-fraction) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
print("Coefficients")
t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
model.coef = t[which(t!=0)]
print(model.coef)
id = NULL # not really needed but added for consistency
return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
}
}
# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
#form <- as.formula(object$call[[2]])
mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
coefi <- coef(object, id = id)
xvars <- names(coefi)
return(mat[,xvars]%*%coefi)
}
test.model = function(model, test, level=0.95
,draw.limits = FALSE, good = 0.1, ok = 0.15
,method = NULL, subopt = NULL
,id = NULL, formula, feature.names, label.names
,transformation = NULL){
## if using caret for glm select equivalent functionality,
## need to pass formula (full is ok as it will select subset of variables from there)
if (is.null(method)){
pred = predict(model, newdata=test, interval="confidence", level = level)
}
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
}
if (method == 'glmnet' && subopt == 'LASSO'){
xtest = as.matrix(test[,feature.names])
pred=as.data.frame(predict(model, xtest))
}
if (method == 'lars'){
pred=as.data.frame(predict(model, newdata = test))
}
# Summary of predicted values
print ("Summary of predicted values: ")
print(summary(pred[,1]))
test.mse = mean((test[,label.names]-pred[,1])^2)
print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
test.rmse = sqrt(test.mse)
print (paste(method, subopt, "Test RMSE:", test.rmse, sep=" "))
if(log.pred == TRUE || norm.pred == TRUE){
# plot transformewd comparison first
df=data.frame(x=test[,label.names],y=pred[,1])
ggplot(df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_abline(slope=1,intercept=0,color='black',size=1) +
#scale_y_continuous(limits=c(min(df),max(df)))+
xlab("Actual (Transformed)")+
ylab("Predicted (Transformed)")
}
if (log.pred == FALSE && norm.pred == FALSE){
x = test[,label.names]
y = pred[,1]
}
if (log.pred == TRUE){
x = 10^test[,label.names]
y = 10^pred[,1]
# x = (test[,label.names])^3
# y = (pred[,1])^3
}
if (norm.pred == TRUE){
x = predict(transformation, test[,label.names], inverse = TRUE)
y = predict(transformation, pred[,1], inverse = TRUE)
}
test.mse = mean((x-y)^2)
print (paste(method, subopt, "Test MSE (Org Scale):", test.mse, sep=" "))
test.rmse = sqrt(test.mse)
print (paste(method, subopt, "Test RMSE (Org Scale):", test.rmse, sep=" "))
df=data.frame(x,y)
ggplot(df,aes(x,y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
#scale_y_continuous(limits=c(min(df),max(df)))+
xlab("Actual")+
ylab("Predicted")
}
n <- names(data.train)
formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
," ~", paste(n[!n %in% label.names], collapse = " + ")))
grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))
print(formula)
## y3 ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 +
## PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + PC19 +
## PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 + PC28 +
## PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 + PC37 +
## PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 + PC46 +
## PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 + PC55 +
## PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 + PC64 +
## PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 + PC73 +
## PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 + PC82 +
## PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 + PC91 +
## PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 + PC100 +
## PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 + PC108 +
## PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 + PC116 +
## PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 + PC124 +
## PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 + PC132 +
## PC133 + PC134 + PC135 + PC136 + PC137 + PC138 + PC139 + PC140 +
## PC141 + PC142 + PC143 + PC144 + PC145 + PC146 + PC147 + PC148 +
## PC149 + PC150 + PC151 + PC152 + PC153 + PC154 + PC155 + PC156 +
## PC157 + PC158 + PC159 + PC160 + PC161 + PC162 + PC163 + PC164
print(grand.mean.formula)
## y3 ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]
model.full = lm(formula , data.train)
summary(model.full)
##
## Call:
## lm(formula = formula, data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.559 -6.528 -1.910 4.587 58.176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.552547 0.127282 986.415 < 2e-16 ***
## PC1 -0.130695 0.011101 -11.773 < 2e-16 ***
## PC2 -0.260821 0.011152 -23.387 < 2e-16 ***
## PC3 -0.124379 0.011250 -11.056 < 2e-16 ***
## PC4 -0.096620 0.011449 -8.439 < 2e-16 ***
## PC5 0.051682 0.011796 4.381 1.20e-05 ***
## PC6 -0.032606 0.011811 -2.761 0.005789 **
## PC7 -0.049668 0.012120 -4.098 4.23e-05 ***
## PC8 -0.012792 0.012292 -1.041 0.298055
## PC9 -0.013346 0.012711 -1.050 0.293785
## PC10 -0.006503 0.012877 -0.505 0.613595
## PC11 -0.146129 0.013684 -10.679 < 2e-16 ***
## PC12 -0.148741 0.014590 -10.195 < 2e-16 ***
## PC13 0.088756 0.014742 6.021 1.85e-09 ***
## PC14 0.078830 0.015268 5.163 2.52e-07 ***
## PC15 -0.006925 0.015443 -0.448 0.653847
## PC16 0.107353 0.015781 6.802 1.14e-11 ***
## PC17 -0.066451 0.016535 -4.019 5.93e-05 ***
## PC18 -0.111549 0.017366 -6.423 1.45e-10 ***
## PC19 0.013614 0.017579 0.774 0.438712
## PC20 0.123055 0.019113 6.438 1.31e-10 ***
## PC21 0.024074 0.019904 1.210 0.226522
## PC22 0.037252 0.031219 1.193 0.232828
## PC23 0.070534 0.038414 1.836 0.066393 .
## PC24 -0.214852 0.045052 -4.769 1.90e-06 ***
## PC25 0.031963 0.050488 0.633 0.526715
## PC26 0.100858 0.051897 1.943 0.052016 .
## PC27 0.095112 0.052006 1.829 0.067477 .
## PC28 0.043043 0.052392 0.822 0.411363
## PC29 0.151608 0.057843 2.621 0.008791 **
## PC30 0.020218 0.058540 0.345 0.729832
## PC31 -0.061870 0.062976 -0.982 0.325927
## PC32 -0.225948 0.063700 -3.547 0.000393 ***
## PC33 0.074412 0.065336 1.139 0.254790
## PC34 0.349027 0.068708 5.080 3.90e-07 ***
## PC35 -0.001661 0.073756 -0.023 0.982031
## PC36 -0.013098 0.074961 -0.175 0.861300
## PC37 -0.122004 0.077447 -1.575 0.115243
## PC38 -0.006577 0.080756 -0.081 0.935089
## PC39 -0.052624 0.081769 -0.644 0.519886
## PC40 -0.109715 0.082730 -1.326 0.184836
## PC41 0.026978 0.084442 0.319 0.749371
## PC42 -0.056771 0.084511 -0.672 0.501768
## PC43 0.003741 0.085730 0.044 0.965199
## PC44 0.029723 0.086155 0.345 0.730110
## PC45 -0.016850 0.086190 -0.196 0.845006
## PC46 0.123263 0.087149 1.414 0.157300
## PC47 -0.128422 0.087421 -1.469 0.141890
## PC48 0.025183 0.088223 0.285 0.775313
## PC49 -0.023187 0.089192 -0.260 0.794897
## PC50 -0.093576 0.090436 -1.035 0.300841
## PC51 0.022880 0.092131 0.248 0.803881
## PC52 -0.023793 0.092430 -0.257 0.796870
## PC53 0.053486 0.090206 0.593 0.553254
## PC54 -0.047300 0.091585 -0.516 0.605552
## PC55 0.027094 0.092434 0.293 0.769447
## PC56 -0.002177 0.093172 -0.023 0.981360
## PC57 -0.158245 0.094245 -1.679 0.093195 .
## PC58 -0.027666 0.094282 -0.293 0.769203
## PC59 0.249173 0.093780 2.657 0.007907 **
## PC60 -0.107319 0.093769 -1.145 0.252461
## PC61 0.103938 0.095152 1.092 0.274735
## PC62 -0.122479 0.095046 -1.289 0.197583
## PC63 -0.108455 0.095320 -1.138 0.255252
## PC64 -0.233860 0.096113 -2.433 0.014999 *
## PC65 -0.041726 0.096420 -0.433 0.665214
## PC66 -0.140816 0.097319 -1.447 0.147968
## PC67 -0.036982 0.097679 -0.379 0.704995
## PC68 0.272055 0.099050 2.747 0.006041 **
## PC69 0.115549 0.099189 1.165 0.244096
## PC70 -0.019454 0.098789 -0.197 0.843892
## PC71 0.256983 0.098973 2.596 0.009444 **
## PC72 -0.008482 0.100528 -0.084 0.932765
## PC73 0.060640 0.100446 0.604 0.546065
## PC74 -0.106636 0.100356 -1.063 0.288020
## PC75 -0.201567 0.100584 -2.004 0.045124 *
## PC76 0.018524 0.101266 0.183 0.854862
## PC77 0.162363 0.101896 1.593 0.111125
## PC78 0.057342 0.102247 0.561 0.574943
## PC79 0.118704 0.103255 1.150 0.250352
## PC80 -0.089778 0.102873 -0.873 0.382864
## PC81 0.214793 0.104055 2.064 0.039044 *
## PC82 0.109176 0.104576 1.044 0.296536
## PC83 -0.249264 0.104612 -2.383 0.017218 *
## PC84 0.218837 0.104554 2.093 0.036391 *
## PC85 0.352160 0.105441 3.340 0.000844 ***
## PC86 -0.083950 0.105906 -0.793 0.427995
## PC87 0.454672 0.106814 4.257 2.11e-05 ***
## PC88 -0.208224 0.107420 -1.938 0.052625 .
## PC89 -0.198925 0.107888 -1.844 0.065265 .
## PC90 -0.188013 0.106515 -1.765 0.077597 .
## PC91 0.069569 0.107226 0.649 0.516488
## PC92 0.043564 0.108521 0.401 0.688120
## PC93 -0.014071 0.108731 -0.129 0.897035
## PC94 -0.219505 0.109094 -2.012 0.044262 *
## PC95 0.008199 0.108770 0.075 0.939916
## PC96 -0.213668 0.110188 -1.939 0.052538 .
## PC97 -0.137448 0.109913 -1.251 0.211165
## PC98 -0.088292 0.109431 -0.807 0.419805
## PC99 -0.134524 0.109136 -1.233 0.217769
## PC100 0.007040 0.110538 0.064 0.949221
## PC101 -0.119594 0.109832 -1.089 0.276257
## PC102 -0.219019 0.110680 -1.979 0.047884 *
## PC103 0.114483 0.111035 1.031 0.302558
## PC104 -0.159338 0.111205 -1.433 0.151963
## PC105 0.174623 0.110913 1.574 0.115451
## PC106 0.292742 0.111354 2.629 0.008589 **
## PC107 -0.050182 0.111351 -0.451 0.652252
## PC108 0.183346 0.111864 1.639 0.101270
## PC109 -0.037107 0.112625 -0.329 0.741809
## PC110 -0.064925 0.112004 -0.580 0.562162
## PC111 -0.176545 0.112333 -1.572 0.116098
## PC112 -0.015230 0.112019 -0.136 0.891856
## PC113 0.101870 0.112348 0.907 0.364586
## PC114 -0.149983 0.112638 -1.332 0.183062
## PC115 -0.411842 0.112941 -3.647 0.000268 ***
## PC116 -0.056480 0.112580 -0.502 0.615904
## PC117 -0.022949 0.112846 -0.203 0.838860
## PC118 0.161193 0.112775 1.429 0.152966
## PC119 -0.233878 0.113487 -2.061 0.039366 *
## PC120 0.069873 0.113676 0.615 0.538797
## PC121 -0.110230 0.113963 -0.967 0.333467
## PC122 0.163345 0.115118 1.419 0.155977
## PC123 -0.244226 0.113240 -2.157 0.031072 *
## PC124 0.144407 0.113631 1.271 0.203838
## PC125 0.111729 0.114700 0.974 0.330049
## PC126 0.126903 0.114953 1.104 0.269661
## PC127 0.062068 0.114381 0.543 0.587399
## PC128 -0.160221 0.115043 -1.393 0.163768
## PC129 -0.087781 0.115363 -0.761 0.446743
## PC130 0.095916 0.115129 0.833 0.404811
## PC131 -0.304885 0.115152 -2.648 0.008128 **
## PC132 0.069923 0.114831 0.609 0.542600
## PC133 0.050821 0.114804 0.443 0.658019
## PC134 0.235247 0.116936 2.012 0.044294 *
## PC135 0.201328 0.115932 1.737 0.082514 .
## PC136 0.105721 0.115691 0.914 0.360850
## PC137 -0.129377 0.116676 -1.109 0.267541
## PC138 0.146849 0.115649 1.270 0.204214
## PC139 -0.214533 0.116354 -1.844 0.065267 .
## PC140 -0.054362 0.116504 -0.467 0.640796
## PC141 0.094664 0.116665 0.811 0.417161
## PC142 -0.079940 0.117418 -0.681 0.496014
## PC143 0.086901 0.117155 0.742 0.458261
## PC144 0.377176 0.117929 3.198 0.001390 **
## PC145 0.069323 0.117751 0.589 0.556073
## PC146 0.279495 0.118014 2.368 0.017904 *
## PC147 0.041163 0.117546 0.350 0.726216
## PC148 -0.077193 0.118643 -0.651 0.515308
## PC149 0.043515 0.117895 0.369 0.712069
## PC150 0.077255 0.118249 0.653 0.513574
## PC151 0.156719 0.118263 1.325 0.185171
## PC152 -0.028463 0.117958 -0.241 0.809334
## PC153 0.148012 0.119046 1.243 0.213806
## PC154 -0.189009 0.119413 -1.583 0.113523
## PC155 0.208139 0.118623 1.755 0.079381 .
## PC156 0.253808 0.119154 2.130 0.033209 *
## PC157 0.018220 0.119371 0.153 0.878694
## PC158 -0.090117 0.118652 -0.760 0.447580
## PC159 0.460142 0.119420 3.853 0.000118 ***
## PC160 -0.024040 0.119045 -0.202 0.839971
## PC161 0.095285 0.118905 0.801 0.422965
## PC162 -0.400365 0.119876 -3.340 0.000844 ***
## PC163 0.312879 0.120148 2.604 0.009236 **
## PC164 0.073022 0.120239 0.607 0.543673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.488 on 5419 degrees of freedom
## Multiple R-squared: 0.2434, Adjusted R-squared: 0.2205
## F-statistic: 10.63 on 164 and 5419 DF, p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)
## [1] "Number of data points that have Cook's D > 4/n: 266"
## [1] "Number of data points that have Cook's D > 1: 0"
high.cd = names(cd.full[cd.full > 4/nrow(data.train)])
#save dataset with high.cd flagged
t = data.train %>%
rownames_to_column() %>%
mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
##
## Call:
## lm(formula = formula, data = data.train2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.097 -5.541 -1.139 4.651 25.298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.244e+02 1.026e-01 1211.661 < 2e-16 ***
## PC1 -1.387e-01 9.135e-03 -15.185 < 2e-16 ***
## PC2 -2.528e-01 9.028e-03 -28.006 < 2e-16 ***
## PC3 -1.294e-01 9.122e-03 -14.183 < 2e-16 ***
## PC4 -1.084e-01 9.265e-03 -11.705 < 2e-16 ***
## PC5 4.798e-02 9.554e-03 5.022 5.30e-07 ***
## PC6 -1.901e-02 9.574e-03 -1.986 0.047121 *
## PC7 -5.146e-02 9.796e-03 -5.253 1.56e-07 ***
## PC8 -1.544e-02 9.978e-03 -1.548 0.121803
## PC9 3.474e-03 1.028e-02 0.338 0.735539
## PC10 1.969e-03 1.042e-02 0.189 0.850125
## PC11 -1.659e-01 1.104e-02 -15.026 < 2e-16 ***
## PC12 -1.532e-01 1.175e-02 -13.033 < 2e-16 ***
## PC13 8.593e-02 1.192e-02 7.208 6.49e-13 ***
## PC14 7.124e-02 1.230e-02 5.791 7.41e-09 ***
## PC15 -1.274e-02 1.250e-02 -1.020 0.307851
## PC16 9.144e-02 1.274e-02 7.178 8.10e-13 ***
## PC17 -6.861e-02 1.332e-02 -5.150 2.70e-07 ***
## PC18 -1.098e-01 1.398e-02 -7.860 4.66e-15 ***
## PC19 2.271e-02 1.421e-02 1.598 0.110079
## PC20 1.257e-01 1.543e-02 8.142 4.82e-16 ***
## PC21 2.261e-02 1.605e-02 1.409 0.158967
## PC22 6.784e-02 2.518e-02 2.695 0.007069 **
## PC23 7.010e-02 3.151e-02 2.225 0.026138 *
## PC24 -2.453e-01 3.655e-02 -6.711 2.15e-11 ***
## PC25 7.240e-02 4.102e-02 1.765 0.077614 .
## PC26 3.136e-02 4.210e-02 0.745 0.456394
## PC27 3.908e-02 4.222e-02 0.926 0.354654
## PC28 1.435e-02 4.251e-02 0.338 0.735715
## PC29 1.525e-01 4.670e-02 3.266 0.001097 **
## PC30 2.199e-02 4.770e-02 0.461 0.644878
## PC31 -5.876e-02 5.107e-02 -1.151 0.249986
## PC32 -1.903e-01 5.147e-02 -3.697 0.000220 ***
## PC33 -4.745e-03 5.336e-02 -0.089 0.929149
## PC34 3.395e-01 5.543e-02 6.125 9.75e-10 ***
## PC35 3.165e-02 6.021e-02 0.526 0.599208
## PC36 -3.541e-02 6.089e-02 -0.582 0.560833
## PC37 -1.356e-01 6.257e-02 -2.167 0.030289 *
## PC38 -4.588e-04 6.511e-02 -0.007 0.994378
## PC39 -3.762e-02 6.945e-02 -0.542 0.588071
## PC40 -1.299e-01 6.773e-02 -1.918 0.055170 .
## PC41 -5.088e-02 6.905e-02 -0.737 0.461240
## PC42 6.146e-02 6.933e-02 0.886 0.375395
## PC43 1.560e-01 7.018e-02 2.223 0.026291 *
## PC44 -3.346e-02 7.183e-02 -0.466 0.641382
## PC45 3.110e-02 7.017e-02 0.443 0.657595
## PC46 1.095e-01 7.095e-02 1.544 0.122717
## PC47 -1.575e-01 7.154e-02 -2.202 0.027712 *
## PC48 7.557e-02 7.201e-02 1.049 0.294045
## PC49 5.399e-02 7.307e-02 0.739 0.459995
## PC50 -1.241e-01 7.415e-02 -1.673 0.094345 .
## PC51 1.178e-01 7.581e-02 1.554 0.120358
## PC52 -3.642e-02 7.552e-02 -0.482 0.629676
## PC53 1.090e-01 7.357e-02 1.482 0.138433
## PC54 -8.821e-02 7.536e-02 -1.170 0.241877
## PC55 -6.475e-02 7.589e-02 -0.853 0.393592
## PC56 -2.840e-02 7.639e-02 -0.372 0.710088
## PC57 -1.473e-01 7.682e-02 -1.917 0.055277 .
## PC58 -1.075e-01 7.686e-02 -1.399 0.161973
## PC59 3.114e-01 7.691e-02 4.049 5.22e-05 ***
## PC60 -1.289e-01 7.680e-02 -1.678 0.093377 .
## PC61 4.419e-02 7.703e-02 0.574 0.566276
## PC62 -7.458e-02 7.749e-02 -0.962 0.335849
## PC63 -9.389e-02 7.790e-02 -1.205 0.228184
## PC64 -2.012e-01 7.842e-02 -2.566 0.010316 *
## PC65 2.832e-03 7.868e-02 0.036 0.971287
## PC66 -6.708e-02 7.975e-02 -0.841 0.400284
## PC67 -1.056e-02 7.957e-02 -0.133 0.894429
## PC68 2.440e-01 8.070e-02 3.023 0.002516 **
## PC69 1.354e-01 8.088e-02 1.675 0.094085 .
## PC70 3.295e-02 7.998e-02 0.412 0.680405
## PC71 1.313e-01 8.018e-02 1.637 0.101678
## PC72 -2.922e-02 8.155e-02 -0.358 0.720132
## PC73 1.128e-01 8.138e-02 1.386 0.165818
## PC74 -1.604e-03 8.174e-02 -0.020 0.984343
## PC75 -1.044e-01 8.160e-02 -1.279 0.200901
## PC76 -1.006e-01 8.203e-02 -1.226 0.220298
## PC77 1.535e-01 8.273e-02 1.856 0.063575 .
## PC78 -3.233e-02 8.284e-02 -0.390 0.696358
## PC79 1.489e-01 8.403e-02 1.772 0.076441 .
## PC80 -4.721e-02 8.317e-02 -0.568 0.570284
## PC81 2.585e-01 8.417e-02 3.071 0.002147 **
## PC82 6.161e-02 8.460e-02 0.728 0.466505
## PC83 -1.851e-01 8.523e-02 -2.172 0.029901 *
## PC84 2.278e-01 8.484e-02 2.686 0.007265 **
## PC85 4.218e-01 8.563e-02 4.927 8.63e-07 ***
## PC86 1.149e-02 8.551e-02 0.134 0.893133
## PC87 4.089e-01 8.631e-02 4.737 2.23e-06 ***
## PC88 -2.217e-01 8.712e-02 -2.545 0.010959 *
## PC89 -1.354e-01 8.731e-02 -1.551 0.120950
## PC90 -1.792e-01 8.648e-02 -2.072 0.038331 *
## PC91 2.582e-03 8.658e-02 0.030 0.976207
## PC92 1.286e-01 8.773e-02 1.466 0.142639
## PC93 -1.617e-01 8.827e-02 -1.832 0.067056 .
## PC94 -1.226e-01 8.822e-02 -1.390 0.164621
## PC95 3.811e-02 8.814e-02 0.432 0.665512
## PC96 -1.657e-01 8.891e-02 -1.863 0.062472 .
## PC97 -4.926e-02 8.898e-02 -0.554 0.579872
## PC98 -1.126e-01 8.831e-02 -1.275 0.202522
## PC99 -4.562e-02 8.818e-02 -0.517 0.604946
## PC100 -7.345e-02 8.908e-02 -0.825 0.409672
## PC101 -1.919e-01 8.886e-02 -2.159 0.030888 *
## PC102 -1.218e-01 8.941e-02 -1.363 0.173022
## PC103 1.038e-01 8.994e-02 1.154 0.248617
## PC104 -1.399e-01 8.947e-02 -1.564 0.117915
## PC105 1.565e-01 8.978e-02 1.743 0.081363 .
## PC106 2.343e-01 8.986e-02 2.607 0.009148 **
## PC107 5.481e-03 9.064e-02 0.060 0.951781
## PC108 8.837e-02 9.018e-02 0.980 0.327152
## PC109 -1.969e-02 9.090e-02 -0.217 0.828523
## PC110 -9.410e-02 9.036e-02 -1.041 0.297710
## PC111 -1.996e-01 9.095e-02 -2.195 0.028226 *
## PC112 -2.590e-02 9.058e-02 -0.286 0.774906
## PC113 6.579e-02 9.060e-02 0.726 0.467795
## PC114 -1.409e-01 9.108e-02 -1.547 0.121814
## PC115 -4.692e-01 9.130e-02 -5.140 2.86e-07 ***
## PC116 -1.894e-02 9.077e-02 -0.209 0.834745
## PC117 6.540e-04 9.108e-02 0.007 0.994271
## PC118 9.801e-02 9.105e-02 1.076 0.281790
## PC119 -1.983e-01 9.167e-02 -2.163 0.030586 *
## PC120 9.382e-02 9.158e-02 1.024 0.305677
## PC121 -1.873e-01 9.187e-02 -2.038 0.041569 *
## PC122 1.430e-01 9.292e-02 1.539 0.123894
## PC123 -2.310e-01 9.140e-02 -2.527 0.011520 *
## PC124 6.071e-02 9.207e-02 0.659 0.509678
## PC125 2.194e-01 9.287e-02 2.363 0.018168 *
## PC126 1.036e-01 9.276e-02 1.117 0.264109
## PC127 -4.902e-02 9.210e-02 -0.532 0.594567
## PC128 -1.509e-01 9.284e-02 -1.625 0.104202
## PC129 -8.941e-03 9.369e-02 -0.095 0.923975
## PC130 9.677e-02 9.305e-02 1.040 0.298415
## PC131 -1.550e-01 9.291e-02 -1.668 0.095309 .
## PC132 3.561e-02 9.255e-02 0.385 0.700426
## PC133 5.425e-02 9.304e-02 0.583 0.559879
## PC134 1.257e-01 9.459e-02 1.328 0.184083
## PC135 1.085e-01 9.369e-02 1.158 0.246992
## PC136 1.295e-01 9.354e-02 1.384 0.166366
## PC137 -1.720e-01 9.392e-02 -1.831 0.067150 .
## PC138 1.324e-01 9.367e-02 1.414 0.157482
## PC139 -1.842e-01 9.436e-02 -1.952 0.051013 .
## PC140 -8.253e-02 9.404e-02 -0.878 0.380174
## PC141 1.226e-01 9.412e-02 1.302 0.192885
## PC142 4.906e-02 9.475e-02 0.518 0.604650
## PC143 1.120e-01 9.432e-02 1.187 0.235191
## PC144 2.729e-01 9.545e-02 2.859 0.004266 **
## PC145 1.477e-01 9.537e-02 1.549 0.121470
## PC146 3.755e-01 9.501e-02 3.952 7.86e-05 ***
## PC147 4.275e-03 9.518e-02 0.045 0.964179
## PC148 -4.341e-02 9.531e-02 -0.455 0.648828
## PC149 3.865e-02 9.485e-02 0.407 0.683680
## PC150 1.232e-01 9.567e-02 1.287 0.198046
## PC151 1.201e-01 9.545e-02 1.258 0.208429
## PC152 -2.326e-02 9.544e-02 -0.244 0.807495
## PC153 8.612e-02 9.566e-02 0.900 0.368008
## PC154 -8.589e-02 9.650e-02 -0.890 0.373527
## PC155 1.911e-01 9.561e-02 1.999 0.045664 *
## PC156 1.394e-01 9.626e-02 1.448 0.147738
## PC157 9.014e-02 9.641e-02 0.935 0.349833
## PC158 -5.956e-02 9.611e-02 -0.620 0.535430
## PC159 3.276e-01 9.628e-02 3.403 0.000672 ***
## PC160 3.572e-02 9.681e-02 0.369 0.712132
## PC161 2.109e-02 9.597e-02 0.220 0.826088
## PC162 -4.138e-01 9.666e-02 -4.281 1.89e-05 ***
## PC163 3.029e-01 9.729e-02 3.113 0.001861 **
## PC164 6.891e-02 9.704e-02 0.710 0.477643
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.458 on 5153 degrees of freedom
## Multiple R-squared: 0.3409, Adjusted R-squared: 0.3199
## F-statistic: 16.25 on 164 and 5153 DF, p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)
## [1] "Number of data points that have Cook's D > 4/n: 254"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before.
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot
plotData = data.train %>%
rownames_to_column() %>%
mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
dplyr::select(type,target=one_of(label.names))
ggplot(data=plotData, aes(x=type,y=target)) +
geom_boxplot(fill='light blue',outlier.shape=NA) +
scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
theme_light() +
ggtitle('Distribution of High Leverage Points and Normal Points')
# 2 sample t-tests
plotData = data.train %>%
rownames_to_column() %>%
mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
dplyr::select(type,one_of(feature.names))
comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
, function(x) t.test(x ~ plotData$type, var.equal = TRUE))
sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
## PC1 PC6 PC11 PC23 PC24 PC25 PC26 PC33 PC41
## 0.0001072349 0.0058606124 0.0011053789 0.0011736711 0.0111317657 0.0009296367 0.0002676658 0.0409342966 0.0402243783
## PC43 PC44 PC46 PC76 PC97 PC131 PC138 PC159
## 0.0238913140 0.0009881893 0.0130897563 0.0233459332 0.0481640017 0.0051489325 0.0408374166 0.0385634058
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))
ggplot(mm,aes(x=type, y=value)) +
geom_boxplot()+
facet_wrap(~variable, ncol=5, scales = 'free_y') +
scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
ggtitle('Distribution of High Leverage Points and Normal Points')
# Distribution (box) Plots
mm = melt(plotData, id=c('type'))
ggplot(mm,aes(x=type, y=value)) +
geom_boxplot()+
facet_wrap(~variable, ncol=8, scales = 'free_y') +
scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
ggtitle('Distribution of High Leverage Points and Normal Points')
model.null = lm(grand.mean.formula, data.train)
summary(model.null)
##
## Call:
## lm(formula = grand.mean.formula, data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.579 -7.113 -1.354 5.639 61.481
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.4914 0.1438 872.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.75 on 5583 degrees of freedom
Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/
if (algo.forward.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
, data = data.train
, method = "leapForward"
, feature.names = feature.names)
model.forward = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 25 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2 2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3 3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4 4 10.023439 0.13141280 7.641212 0.5019716 0.03605893 0.3239474
## 5 5 9.925468 0.14801576 7.552602 0.4996998 0.03658746 0.3075944
## 6 6 9.867988 0.15781946 7.492552 0.5218940 0.03947911 0.3374633
## 7 7 9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8 8 9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9 9 9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10 10 9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11 11 9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12 12 9.736668 0.17982775 7.409408 0.4927943 0.03807716 0.3166351
## 13 13 9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14 14 9.710142 0.18453084 7.386957 0.4937527 0.03943505 0.3209735
## 15 15 9.705682 0.18524759 7.383546 0.5000330 0.03877914 0.3263911
## 16 16 9.696500 0.18676637 7.377538 0.4970043 0.03730214 0.3148459
## 17 17 9.674323 0.19036045 7.355053 0.4863771 0.03640112 0.3026000
## 18 18 9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19 19 9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20 20 9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21 21 9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22 22 9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23 23 9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24 24 9.620150 0.19934080 7.312674 0.4714197 0.03361914 0.2939178
## 25 25 9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26 26 9.623708 0.19893673 7.317046 0.4714566 0.03434947 0.2956297
## 27 27 9.627039 0.19847483 7.320462 0.4801393 0.03458080 0.3061704
## 28 28 9.627234 0.19837556 7.318306 0.4690775 0.03282265 0.2991245
## 29 29 9.624637 0.19885758 7.316869 0.4657579 0.03222623 0.2977682
## 30 30 9.622027 0.19927591 7.315291 0.4712930 0.03298640 0.3037713
## 31 31 9.624224 0.19897155 7.324123 0.4717754 0.03316739 0.3064307
## 32 32 9.625179 0.19886894 7.325359 0.4758757 0.03426587 0.3167161
## 33 33 9.627267 0.19861554 7.324894 0.4830946 0.03504843 0.3216019
## 34 34 9.620108 0.19977267 7.323983 0.4837071 0.03464828 0.3242701
## 35 35 9.622769 0.19926429 7.322787 0.4781829 0.03300448 0.3240712
## 36 36 9.623398 0.19910784 7.318635 0.4793814 0.03311017 0.3251113
## 37 37 9.634104 0.19742675 7.324580 0.4823170 0.03288461 0.3265167
## 38 38 9.636863 0.19707855 7.326810 0.4842750 0.03258278 0.3218998
## 39 39 9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40 40 9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41 41 9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42 42 9.635209 0.19752633 7.331015 0.4786513 0.03230496 0.3176523
## 43 43 9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44 44 9.646134 0.19603882 7.342916 0.4790607 0.03211242 0.3144387
## 45 45 9.645160 0.19621505 7.344346 0.4810358 0.03235679 0.3151963
## 46 46 9.649825 0.19555997 7.344793 0.4827900 0.03250864 0.3180991
## 47 47 9.645080 0.19627428 7.344954 0.4824734 0.03243365 0.3170790
## 48 48 9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49 49 9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50 50 9.649302 0.19569868 7.350758 0.4810711 0.03144358 0.3152121
## 51 51 9.649499 0.19573827 7.349735 0.4888994 0.03202542 0.3186445
## 52 52 9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53 53 9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54 54 9.645082 0.19648076 7.345811 0.4808532 0.03131329 0.3170108
## 55 55 9.640925 0.19721429 7.343046 0.4849292 0.03188032 0.3212494
## 56 56 9.637013 0.19790079 7.338159 0.4904495 0.03268701 0.3247092
## 57 57 9.638109 0.19777097 7.340913 0.4898962 0.03226083 0.3221252
## 58 58 9.638407 0.19778974 7.339997 0.4858916 0.03179945 0.3205142
## 59 59 9.639943 0.19758818 7.337055 0.4827745 0.03170063 0.3188693
## 60 60 9.640663 0.19750436 7.336194 0.4815449 0.03186693 0.3185537
## 61 61 9.640124 0.19765083 7.337367 0.4833328 0.03179793 0.3186146
## 62 62 9.633940 0.19861069 7.331281 0.4818583 0.03151828 0.3182952
## 63 63 9.628424 0.19948669 7.327147 0.4847013 0.03192254 0.3182572
## 64 64 9.624632 0.20009907 7.326365 0.4819237 0.03189702 0.3192949
## 65 65 9.622465 0.20042856 7.325254 0.4798265 0.03170182 0.3157264
## 66 66 9.622955 0.20034898 7.325146 0.4816697 0.03204894 0.3181711
## 67 67 9.628533 0.19951967 7.328708 0.4777532 0.03161616 0.3146589
## 68 68 9.627700 0.19968550 7.327059 0.4790401 0.03161636 0.3143854
## 69 69 9.628646 0.19956177 7.328556 0.4758252 0.03096929 0.3131130
## 70 70 9.628704 0.19957924 7.331279 0.4744442 0.03061362 0.3089066
## 71 71 9.633963 0.19878019 7.334681 0.4743814 0.03081528 0.3079131
## 72 72 9.632865 0.19893976 7.334863 0.4731412 0.03041359 0.3077718
## 73 73 9.634444 0.19863871 7.335496 0.4738841 0.03074422 0.3128240
## 74 74 9.633199 0.19886446 7.332381 0.4697864 0.03016688 0.3109643
## 75 75 9.634434 0.19870236 7.332797 0.4713104 0.03034128 0.3128872
## 76 76 9.637916 0.19828939 7.334940 0.4708538 0.03065172 0.3126119
## 77 77 9.637252 0.19842780 7.334817 0.4729461 0.03100066 0.3128065
## 78 78 9.637472 0.19842780 7.336893 0.4744315 0.03110637 0.3152575
## 79 79 9.636680 0.19857333 7.336001 0.4804500 0.03160138 0.3196203
## 80 80 9.636986 0.19855864 7.334999 0.4818910 0.03186822 0.3213856
## 81 81 9.635285 0.19884976 7.333501 0.4824718 0.03204367 0.3210905
## 82 82 9.640748 0.19811068 7.337920 0.4810537 0.03205001 0.3203650
## 83 83 9.642481 0.19787745 7.338285 0.4834944 0.03230321 0.3211610
## 84 84 9.646272 0.19732857 7.341442 0.4872627 0.03254754 0.3251286
## 85 85 9.644749 0.19756028 7.339795 0.4868332 0.03254305 0.3271164
## 86 86 9.643731 0.19773806 7.339376 0.4869090 0.03252164 0.3281615
## 87 87 9.645909 0.19748384 7.342140 0.4880268 0.03265441 0.3268914
## 88 88 9.644757 0.19768174 7.341526 0.4880668 0.03294075 0.3269501
## 89 89 9.645023 0.19768281 7.341219 0.4899201 0.03297994 0.3266695
## 90 90 9.644678 0.19774921 7.339831 0.4899577 0.03319705 0.3252407
## 91 91 9.644409 0.19776139 7.337215 0.4893980 0.03319314 0.3242811
## 92 92 9.642856 0.19800183 7.334264 0.4885725 0.03310819 0.3250349
## 93 93 9.642154 0.19812880 7.332556 0.4915675 0.03349869 0.3271316
## 94 94 9.639872 0.19846016 7.331758 0.4902764 0.03325887 0.3255088
## 95 95 9.638005 0.19871153 7.332392 0.4882124 0.03298859 0.3226219
## 96 96 9.641738 0.19818132 7.335622 0.4891497 0.03311914 0.3255611
## 97 97 9.640631 0.19831772 7.335446 0.4872026 0.03269403 0.3251129
## 98 98 9.637355 0.19877654 7.334024 0.4868692 0.03264230 0.3256004
## 99 99 9.637689 0.19870467 7.334804 0.4846126 0.03246350 0.3247642
## 100 100 9.638953 0.19857125 7.335761 0.4870460 0.03286540 0.3285168
## 101 101 9.637922 0.19875072 7.335148 0.4869846 0.03279377 0.3262044
## 102 102 9.639737 0.19847845 7.334673 0.4869977 0.03270639 0.3269458
## 103 103 9.637043 0.19888613 7.333817 0.4860516 0.03252267 0.3258162
## 104 104 9.635660 0.19911137 7.331260 0.4844192 0.03227342 0.3244761
## 105 105 9.633810 0.19939515 7.327971 0.4857133 0.03246504 0.3241686
## 106 106 9.633238 0.19952111 7.328810 0.4861357 0.03266381 0.3246772
## 107 107 9.631306 0.19982553 7.325965 0.4871883 0.03302511 0.3251029
## 108 108 9.631423 0.19984389 7.325989 0.4907414 0.03353751 0.3268670
## 109 109 9.632987 0.19961834 7.326950 0.4920127 0.03363840 0.3277024
## 110 110 9.633933 0.19951000 7.329165 0.4929164 0.03372692 0.3285818
## 111 111 9.635282 0.19932179 7.330047 0.4923225 0.03373661 0.3272854
## 112 112 9.635311 0.19932662 7.330227 0.4921802 0.03375941 0.3269019
## 113 113 9.637317 0.19904490 7.332003 0.4932715 0.03390863 0.3271163
## 114 114 9.638101 0.19892513 7.332506 0.4935269 0.03387177 0.3264111
## 115 115 9.637307 0.19901663 7.332073 0.4936147 0.03385586 0.3265408
## 116 116 9.635376 0.19927917 7.330591 0.4918289 0.03358816 0.3255643
## 117 117 9.634994 0.19933702 7.331559 0.4910203 0.03348498 0.3254264
## 118 118 9.633779 0.19953113 7.331312 0.4946111 0.03394629 0.3266887
## 119 119 9.634948 0.19935903 7.332872 0.4942967 0.03411209 0.3263033
## 120 120 9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121 121 9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122 122 9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123 123 9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124 124 9.634449 0.19943503 7.331651 0.4931734 0.03398508 0.3245911
## 125 125 9.634523 0.19942949 7.331194 0.4919827 0.03404017 0.3240884
## 126 126 9.635073 0.19936421 7.330728 0.4922135 0.03405362 0.3234224
## 127 127 9.633802 0.19955709 7.329625 0.4936136 0.03422410 0.3241812
## 128 128 9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129 129 9.635138 0.19935852 7.330553 0.4939452 0.03415657 0.3254823
## 130 130 9.635835 0.19926628 7.331299 0.4944289 0.03422693 0.3263709
## 131 131 9.635837 0.19929391 7.332557 0.4956810 0.03448172 0.3281210
## 132 132 9.634573 0.19949616 7.331093 0.4966212 0.03468216 0.3283526
## 133 133 9.634159 0.19955455 7.330413 0.4957496 0.03465020 0.3280146
## 134 134 9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135 135 9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136 136 9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137 137 9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138 138 9.631808 0.19991432 7.331033 0.4934708 0.03437552 0.3256485
## 139 139 9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140 140 9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141 141 9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142 142 9.630717 0.20008648 7.330770 0.4922773 0.03410335 0.3243341
## 143 143 9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144 144 9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145 145 9.630944 0.20003653 7.330957 0.4930576 0.03407356 0.3250432
## 146 146 9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147 147 9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148 148 9.631653 0.19994064 7.331453 0.4931675 0.03406227 0.3242381
## 149 149 9.632159 0.19988227 7.331403 0.4929932 0.03407681 0.3241466
## 150 150 9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151 151 9.632196 0.19987376 7.331605 0.4930766 0.03398577 0.3242435
## 152 152 9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153 153 9.632308 0.19986587 7.331162 0.4935183 0.03404603 0.3245987
## 154 154 9.632400 0.19985752 7.331563 0.4933914 0.03403469 0.3246104
## 155 155 9.632458 0.19984612 7.331529 0.4936124 0.03404341 0.3247388
## 156 156 9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157 157 9.632222 0.19987341 7.331613 0.4936005 0.03404458 0.3244574
## 158 158 9.631946 0.19991440 7.331456 0.4936122 0.03404299 0.3245422
## 159 159 9.632034 0.19990167 7.331550 0.4934646 0.03404531 0.3245381
## 160 160 9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161 161 9.631809 0.19993444 7.331368 0.4937166 0.03408048 0.3245996
## 162 162 9.631782 0.19993549 7.331476 0.4936692 0.03406819 0.3244681
## 163 163 9.631765 0.19993663 7.331428 0.4936124 0.03405973 0.3244335
## 164 164 9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
## nvmax
## 25 25
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 125.53971849 125.28883081 125.79060618
## PC1 -0.13155296 -0.15340867 -0.10969725
## PC2 -0.26256899 -0.28454276 -0.24059521
## PC3 -0.12415708 -0.14631202 -0.10200214
## PC4 -0.09550221 -0.11805332 -0.07295111
## PC5 0.05224619 0.02900617 0.07548621
## PC7 -0.05076737 -0.07462249 -0.02691225
## PC11 -0.14811447 -0.17507997 -0.12114898
## PC12 -0.14975367 -0.17850496 -0.12100239
## PC13 0.08922680 0.06019911 0.11825449
## PC14 0.07847991 0.04839389 0.10856593
## PC16 0.10772985 0.07664904 0.13881065
## PC17 -0.06536087 -0.09794315 -0.03277859
## PC18 -0.11027784 -0.14449562 -0.07606006
## PC20 0.12375601 0.08610353 0.16140849
## PC24 -0.20796590 -0.29669653 -0.11923527
## PC32 -0.22627313 -0.35174041 -0.10080586
## PC34 0.34413952 0.20877870 0.47950034
## PC71 0.26455787 0.06967743 0.45943832
## PC85 0.34616931 0.13853396 0.55380465
## PC87 0.45096243 0.24053728 0.66138758
## PC115 -0.41723819 -0.63976712 -0.19470927
## PC131 -0.31769431 -0.54455701 -0.09083161
## PC144 0.37621373 0.14391357 0.60851390
## PC159 0.44676508 0.21161430 0.68191585
## PC162 -0.39020390 -0.62632711 -0.15408070
if (algo.forward.caret == TRUE){
test.model(model=model.forward, test=data.test
,method = 'leapForward',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 107.5 123.0 126.5 125.7 129.3 135.6
## [1] "leapForward Test MSE: 93.778539216204"
## [1] "leapForward Test RMSE: 9.68393201216345"
## [1] "leapForward Test MSE (Org Scale): 93.778539216204"
## [1] "leapForward Test RMSE (Org Scale): 9.68393201216345"
if (algo.backward.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "leapBackward"
,feature.names = feature.names)
model.backward = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 25 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2 2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3 3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4 4 10.023439 0.13141280 7.641212 0.5019716 0.03605893 0.3239474
## 5 5 9.925468 0.14801576 7.552602 0.4996998 0.03658746 0.3075944
## 6 6 9.867988 0.15781946 7.492552 0.5218940 0.03947911 0.3374633
## 7 7 9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8 8 9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9 9 9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10 10 9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11 11 9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12 12 9.736668 0.17982775 7.409408 0.4927943 0.03807716 0.3166351
## 13 13 9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14 14 9.710142 0.18453084 7.386957 0.4937527 0.03943505 0.3209735
## 15 15 9.705682 0.18524759 7.383546 0.5000330 0.03877914 0.3263911
## 16 16 9.696500 0.18676637 7.377538 0.4970043 0.03730214 0.3148459
## 17 17 9.674323 0.19036045 7.355053 0.4863771 0.03640112 0.3026000
## 18 18 9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19 19 9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20 20 9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21 21 9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22 22 9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23 23 9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24 24 9.620150 0.19934080 7.312674 0.4714197 0.03361914 0.2939178
## 25 25 9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26 26 9.623708 0.19893673 7.317046 0.4714566 0.03434947 0.2956297
## 27 27 9.627039 0.19847483 7.320462 0.4801393 0.03458080 0.3061704
## 28 28 9.627609 0.19831751 7.318180 0.4697247 0.03289685 0.2989354
## 29 29 9.623764 0.19899899 7.316502 0.4725575 0.03326723 0.2985276
## 30 30 9.619851 0.19962591 7.315416 0.4756054 0.03372327 0.3034597
## 31 31 9.624224 0.19897155 7.324123 0.4717754 0.03316739 0.3064307
## 32 32 9.625179 0.19886894 7.325359 0.4758757 0.03426587 0.3167161
## 33 33 9.627337 0.19861048 7.325259 0.4830518 0.03504374 0.3213584
## 34 34 9.626849 0.19863301 7.326118 0.4793046 0.03402334 0.3228997
## 35 35 9.625014 0.19891473 7.324586 0.4764846 0.03303804 0.3234589
## 36 36 9.627385 0.19852215 7.321155 0.4862141 0.03389284 0.3290649
## 37 37 9.634632 0.19734518 7.324639 0.4832365 0.03299993 0.3266106
## 38 38 9.636863 0.19707855 7.326810 0.4842750 0.03258278 0.3218998
## 39 39 9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40 40 9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41 41 9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42 42 9.635209 0.19752633 7.331015 0.4786513 0.03230496 0.3176523
## 43 43 9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44 44 9.643705 0.19642824 7.342231 0.4763202 0.03192094 0.3139294
## 45 45 9.639852 0.19705398 7.343501 0.4769665 0.03181261 0.3152953
## 46 46 9.642390 0.19671863 7.342911 0.4763663 0.03180864 0.3174432
## 47 47 9.642987 0.19659628 7.346236 0.4801213 0.03227406 0.3180657
## 48 48 9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49 49 9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50 50 9.649638 0.19562552 7.352013 0.4807571 0.03139963 0.3145837
## 51 51 9.648766 0.19583917 7.349302 0.4895776 0.03208614 0.3188607
## 52 52 9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53 53 9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54 54 9.645879 0.19638340 7.346858 0.4849839 0.03155713 0.3198481
## 55 55 9.641255 0.19713095 7.343260 0.4872314 0.03187938 0.3223013
## 56 56 9.634455 0.19823908 7.334609 0.4852839 0.03180882 0.3214321
## 57 57 9.638225 0.19774653 7.338815 0.4892439 0.03236620 0.3236202
## 58 58 9.639753 0.19755560 7.338282 0.4840383 0.03181736 0.3217224
## 59 59 9.641443 0.19733518 7.337813 0.4813345 0.03160042 0.3184130
## 60 60 9.637847 0.19794122 7.332749 0.4823624 0.03218830 0.3196055
## 61 61 9.637085 0.19813070 7.333834 0.4843272 0.03216323 0.3196899
## 62 62 9.630105 0.19922757 7.328405 0.4841952 0.03209729 0.3200003
## 63 63 9.626028 0.19986614 7.325545 0.4861569 0.03228542 0.3192122
## 64 64 9.624632 0.20009907 7.326365 0.4819237 0.03189702 0.3192949
## 65 65 9.623066 0.20034085 7.326416 0.4794706 0.03162154 0.3150573
## 66 66 9.623583 0.20025638 7.326371 0.4812940 0.03196354 0.3174405
## 67 67 9.628533 0.19951967 7.328708 0.4777532 0.03161616 0.3146589
## 68 68 9.626558 0.19982993 7.325933 0.4768326 0.03135079 0.3122173
## 69 69 9.627609 0.19967295 7.327638 0.4735192 0.03068204 0.3107170
## 70 70 9.628046 0.19963568 7.330722 0.4713702 0.03026589 0.3081748
## 71 71 9.633440 0.19884876 7.334766 0.4744917 0.03083888 0.3079760
## 72 72 9.630207 0.19935318 7.333459 0.4730032 0.03053585 0.3072335
## 73 73 9.634320 0.19866301 7.334429 0.4738796 0.03075226 0.3124372
## 74 74 9.632606 0.19896334 7.332290 0.4697531 0.03019562 0.3109295
## 75 75 9.633333 0.19887884 7.330988 0.4707002 0.03040032 0.3122666
## 76 76 9.637270 0.19839607 7.334204 0.4705131 0.03069367 0.3123891
## 77 77 9.637330 0.19842028 7.335480 0.4730939 0.03101370 0.3140298
## 78 78 9.637987 0.19834693 7.337129 0.4754113 0.03124856 0.3156944
## 79 79 9.634839 0.19888245 7.335009 0.4794442 0.03168884 0.3197409
## 80 80 9.634369 0.19898613 7.332618 0.4806072 0.03205410 0.3207875
## 81 81 9.635268 0.19887677 7.333434 0.4822420 0.03223080 0.3210465
## 82 82 9.640854 0.19809241 7.336798 0.4808563 0.03197181 0.3209463
## 83 83 9.642530 0.19785495 7.338418 0.4826313 0.03217259 0.3214175
## 84 84 9.645931 0.19737047 7.341670 0.4866171 0.03247379 0.3255516
## 85 85 9.643375 0.19777569 7.338913 0.4876278 0.03273128 0.3282442
## 86 86 9.642478 0.19794873 7.337956 0.4883664 0.03279266 0.3297883
## 87 87 9.644971 0.19762677 7.342382 0.4886839 0.03272002 0.3280370
## 88 88 9.643955 0.19779005 7.340143 0.4888782 0.03300201 0.3268896
## 89 89 9.646381 0.19748613 7.342141 0.4911933 0.03337987 0.3272014
## 90 90 9.644974 0.19770212 7.339921 0.4900193 0.03324822 0.3252611
## 91 91 9.645644 0.19759609 7.338008 0.4896470 0.03336991 0.3244592
## 92 92 9.643941 0.19784867 7.335431 0.4887986 0.03327433 0.3253143
## 93 93 9.642457 0.19808291 7.333022 0.4909757 0.03339273 0.3271385
## 94 94 9.638946 0.19861517 7.331746 0.4894207 0.03318396 0.3253670
## 95 95 9.637986 0.19871240 7.332079 0.4882037 0.03298901 0.3225894
## 96 96 9.641738 0.19818132 7.335622 0.4891497 0.03311914 0.3255611
## 97 97 9.640693 0.19830926 7.335751 0.4871465 0.03269158 0.3249365
## 98 98 9.638406 0.19861060 7.335546 0.4859267 0.03260109 0.3247582
## 99 99 9.638292 0.19860432 7.335371 0.4835742 0.03239127 0.3241332
## 100 100 9.637979 0.19871634 7.335049 0.4867231 0.03277228 0.3283375
## 101 101 9.637922 0.19875072 7.335148 0.4869846 0.03279377 0.3262044
## 102 102 9.639737 0.19847845 7.334673 0.4869977 0.03270639 0.3269458
## 103 103 9.636989 0.19891360 7.333854 0.4888500 0.03280641 0.3269784
## 104 104 9.635866 0.19910494 7.331034 0.4877309 0.03262011 0.3250461
## 105 105 9.634853 0.19925137 7.329637 0.4881192 0.03278720 0.3267913
## 106 106 9.633075 0.19954496 7.328999 0.4862240 0.03268191 0.3245915
## 107 107 9.631798 0.19974523 7.327250 0.4869244 0.03296552 0.3245389
## 108 108 9.632383 0.19971464 7.327464 0.4914903 0.03359949 0.3269074
## 109 109 9.633354 0.19957533 7.328699 0.4929301 0.03368659 0.3274432
## 110 110 9.634164 0.19946059 7.330328 0.4929124 0.03365076 0.3280303
## 111 111 9.635282 0.19932179 7.330047 0.4923225 0.03373661 0.3272854
## 112 112 9.635311 0.19932662 7.330227 0.4921802 0.03375941 0.3269019
## 113 113 9.636710 0.19913524 7.331110 0.4930719 0.03385221 0.3268784
## 114 114 9.637493 0.19901546 7.331498 0.4933230 0.03381400 0.3261352
## 115 115 9.637400 0.19899686 7.331122 0.4938071 0.03378569 0.3256036
## 116 116 9.636425 0.19913059 7.331039 0.4933183 0.03373932 0.3262363
## 117 117 9.636952 0.19906629 7.332437 0.4932250 0.03373071 0.3259437
## 118 118 9.634861 0.19936600 7.332259 0.4949759 0.03405248 0.3269493
## 119 119 9.634948 0.19935903 7.332872 0.4942967 0.03411209 0.3263033
## 120 120 9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121 121 9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122 122 9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123 123 9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124 124 9.635311 0.19933604 7.331776 0.4924463 0.03393819 0.3245462
## 125 125 9.634750 0.19940906 7.330999 0.4917910 0.03403052 0.3241592
## 126 126 9.635770 0.19927646 7.330790 0.4916262 0.03401231 0.3234002
## 127 127 9.634268 0.19949013 7.329844 0.4932195 0.03419209 0.3241022
## 128 128 9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129 129 9.635138 0.19935852 7.330553 0.4939452 0.03415657 0.3254823
## 130 130 9.635817 0.19926592 7.331557 0.4943953 0.03422760 0.3268632
## 131 131 9.635713 0.19931451 7.332509 0.4954391 0.03444387 0.3280296
## 132 132 9.634573 0.19949616 7.331093 0.4966212 0.03468216 0.3283526
## 133 133 9.634159 0.19955455 7.330413 0.4957496 0.03465020 0.3280146
## 134 134 9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135 135 9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136 136 9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137 137 9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138 138 9.631808 0.19991432 7.331033 0.4934708 0.03437552 0.3256485
## 139 139 9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140 140 9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141 141 9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142 142 9.630717 0.20008648 7.330770 0.4922773 0.03410335 0.3243341
## 143 143 9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144 144 9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145 145 9.630944 0.20003653 7.330957 0.4930576 0.03407356 0.3250432
## 146 146 9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147 147 9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148 148 9.631653 0.19994064 7.331453 0.4931675 0.03406227 0.3242381
## 149 149 9.632159 0.19988227 7.331403 0.4929932 0.03407681 0.3241466
## 150 150 9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151 151 9.632196 0.19987376 7.331605 0.4930766 0.03398577 0.3242435
## 152 152 9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153 153 9.632308 0.19986587 7.331162 0.4935183 0.03404603 0.3245987
## 154 154 9.632400 0.19985752 7.331563 0.4933914 0.03403469 0.3246104
## 155 155 9.632458 0.19984612 7.331529 0.4936124 0.03404341 0.3247388
## 156 156 9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157 157 9.632222 0.19987341 7.331613 0.4936005 0.03404458 0.3244574
## 158 158 9.631946 0.19991440 7.331456 0.4936122 0.03404299 0.3245422
## 159 159 9.632034 0.19990167 7.331550 0.4934646 0.03404531 0.3245381
## 160 160 9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161 161 9.631809 0.19993444 7.331368 0.4937166 0.03408048 0.3245996
## 162 162 9.631782 0.19993549 7.331476 0.4936692 0.03406819 0.3244681
## 163 163 9.631765 0.19993663 7.331428 0.4936124 0.03405973 0.3244335
## 164 164 9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
## nvmax
## 25 25
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 125.53752612 125.28664016 125.788412094
## PC1 -0.13174378 -0.15360057 -0.109886988
## PC2 -0.26265119 -0.28462520 -0.240677171
## PC3 -0.12392626 -0.14608133 -0.101771190
## PC4 -0.09605243 -0.11860394 -0.073500921
## PC5 0.05232279 0.02908298 0.075562607
## PC6 -0.03151566 -0.05479069 -0.008240638
## PC7 -0.05104896 -0.07490210 -0.027195809
## PC11 -0.14809027 -0.17505597 -0.121124574
## PC12 -0.14995002 -0.17870177 -0.121198282
## PC13 0.08936427 0.06033631 0.118392224
## PC14 0.07883159 0.04874545 0.108917730
## PC16 0.10772954 0.07664863 0.138810453
## PC17 -0.06539942 -0.09798185 -0.032816992
## PC18 -0.11065770 -0.14487675 -0.076438656
## PC20 0.12389080 0.08623812 0.161543488
## PC24 -0.20717929 -0.29591195 -0.118446630
## PC32 -0.22568123 -0.35114761 -0.100214855
## PC34 0.34242571 0.20706931 0.477782106
## PC85 0.34636392 0.13872842 0.553999423
## PC87 0.45277268 0.24235715 0.663188204
## PC115 -0.41521988 -0.63775861 -0.192681152
## PC131 -0.31671509 -0.54358089 -0.089849296
## PC144 0.37184913 0.13955224 0.604146014
## PC159 0.44155784 0.20640088 0.676714787
## PC162 -0.38798731 -0.62411302 -0.151861607
if (algo.backward.caret == TRUE){
test.model(model.backward, data.test
,method = 'leapBackward',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 107.2 122.9 126.5 125.7 129.3 135.8
## [1] "leapBackward Test MSE: 93.7076027896315"
## [1] "leapBackward Test RMSE: 9.68026873540355"
## [1] "leapBackward Test MSE (Org Scale): 93.7076027896315"
## [1] "leapBackward Test RMSE (Org Scale): 9.68026873540355"
if (algo.stepwise.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "leapSeq"
,feature.names = feature.names)
model.stepwise = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 35 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2 2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3 3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4 4 10.035485 0.12975376 7.639849 0.5046685 0.03739921 0.3292890
## 5 5 9.932844 0.14670423 7.558306 0.5039755 0.03809777 0.3114495
## 6 6 10.030405 0.13051774 7.635995 0.5076900 0.03591819 0.3354414
## 7 7 9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8 8 9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9 9 9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10 10 9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11 11 9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12 12 9.767298 0.17484789 7.434547 0.5281442 0.04312871 0.3371246
## 13 13 9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14 14 9.723740 0.18230840 7.395550 0.4649143 0.03431618 0.2982257
## 15 15 9.731958 0.18077727 7.407222 0.5078310 0.04139178 0.3310715
## 16 16 9.709285 0.18458215 7.376846 0.5096415 0.03979359 0.3281043
## 17 17 9.676890 0.19000806 7.351709 0.4866735 0.03635214 0.3012762
## 18 18 9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19 19 9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20 20 9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21 21 9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22 22 9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23 23 9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24 24 9.628839 0.19794973 7.316840 0.4829603 0.03485251 0.2975578
## 25 25 9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26 26 9.632674 0.19743285 7.327755 0.4799379 0.03477068 0.3097468
## 27 27 9.632822 0.19751978 7.328972 0.4687156 0.03259117 0.2848668
## 28 28 9.627234 0.19837556 7.318306 0.4690775 0.03282265 0.2991245
## 29 29 9.633097 0.19742420 7.326575 0.4703787 0.03321850 0.3017409
## 30 30 9.627875 0.19814296 7.331764 0.4685781 0.03308127 0.3034400
## 31 31 9.639787 0.19631180 7.332932 0.4549729 0.03218927 0.2876044
## 32 32 9.629992 0.19810176 7.325367 0.4762382 0.03409035 0.3167192
## 33 33 9.624376 0.19933192 7.329465 0.4849616 0.03577901 0.3188442
## 34 34 9.624356 0.19903069 7.326110 0.4812036 0.03399530 0.3229024
## 35 35 9.613278 0.20055832 7.322700 0.4770978 0.03339960 0.3192462
## 36 36 9.629847 0.19795599 7.320346 0.4747992 0.03336160 0.3245736
## 37 37 9.634632 0.19734518 7.324639 0.4832365 0.03299993 0.3266106
## 38 38 9.632323 0.19770234 7.322009 0.4765628 0.03174522 0.3144365
## 39 39 9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40 40 9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41 41 9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42 42 9.631121 0.19815246 7.326538 0.4820721 0.03236611 0.3196006
## 43 43 9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44 44 9.653638 0.19469807 7.345508 0.4730572 0.03215679 0.3132738
## 45 45 9.644565 0.19626995 7.344135 0.4816156 0.03239358 0.3153108
## 46 46 9.647166 0.19597772 7.342676 0.4817311 0.03217179 0.3172612
## 47 47 9.641219 0.19639501 7.344418 0.4793462 0.03190739 0.3177120
## 48 48 9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49 49 9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50 50 9.649302 0.19569868 7.350758 0.4810711 0.03144358 0.3152121
## 51 51 9.648766 0.19583917 7.349302 0.4895776 0.03208614 0.3188607
## 52 52 9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53 53 9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54 54 9.657675 0.19429043 7.351963 0.4699501 0.03105085 0.3140519
## 55 55 9.640991 0.19709526 7.343983 0.4848072 0.03164436 0.3190550
## 56 56 9.633323 0.19843090 7.335343 0.4863930 0.03189816 0.3210032
## 57 57 9.634674 0.19764783 7.333981 0.4812958 0.03234191 0.3089316
## 58 58 9.645476 0.19656866 7.333898 0.4979858 0.03345372 0.3193452
## 59 59 9.638123 0.19806877 7.341765 0.4839277 0.03220295 0.3160701
## 60 60 9.640663 0.19750436 7.336194 0.4815449 0.03186693 0.3185537
## 61 61 9.632959 0.19853639 7.328617 0.4711072 0.03053782 0.3064884
## 62 62 9.650786 0.19575846 7.343055 0.4676786 0.03133202 0.3124863
## 63 63 9.626028 0.19986614 7.325545 0.4861569 0.03228542 0.3192122
## 64 64 9.619048 0.20125257 7.327606 0.4855103 0.03312232 0.3186253
## 65 65 9.622465 0.20042856 7.325254 0.4798265 0.03170182 0.3157264
## 66 66 9.623583 0.20025638 7.326371 0.4812940 0.03196354 0.3174405
## 67 67 9.625210 0.20027783 7.332145 0.4799278 0.03243753 0.3126892
## 68 68 9.627700 0.19968550 7.327059 0.4790401 0.03161636 0.3143854
## 69 69 9.627498 0.19970770 7.327376 0.4736241 0.03070443 0.3108537
## 70 70 9.627408 0.19973652 7.330858 0.4719680 0.03032860 0.3081055
## 71 71 9.633821 0.19879325 7.334645 0.4745140 0.03082342 0.3079312
## 72 72 9.633054 0.19840440 7.335435 0.4729647 0.03012478 0.3075016
## 73 73 9.633220 0.19885323 7.334183 0.4732130 0.03081773 0.3124056
## 74 74 9.632606 0.19896334 7.332290 0.4697531 0.03019562 0.3109295
## 75 75 9.643045 0.19722997 7.331824 0.4726842 0.03031813 0.3125013
## 76 76 9.637270 0.19839607 7.334204 0.4705131 0.03069367 0.3123891
## 77 77 9.651117 0.19605608 7.343962 0.4623608 0.03142971 0.3092341
## 78 78 9.637920 0.19836162 7.336733 0.4753775 0.03125471 0.3155869
## 79 79 9.634738 0.19903145 7.340632 0.4794420 0.03182018 0.3167133
## 80 80 9.628402 0.19961485 7.327466 0.4660926 0.03012935 0.3079969
## 81 81 9.634214 0.19915641 7.338179 0.4822087 0.03226571 0.3179727
## 82 82 9.644343 0.19763083 7.345891 0.4806156 0.03166716 0.3172938
## 83 83 9.642862 0.19781385 7.338282 0.4832640 0.03224512 0.3211627
## 84 84 9.646473 0.19735186 7.348114 0.4862948 0.03245701 0.3226072
## 85 85 9.644749 0.19756028 7.339795 0.4868332 0.03254305 0.3271164
## 86 86 9.646077 0.19729338 7.335464 0.4930609 0.03343510 0.3274167
## 87 87 9.645155 0.19762625 7.347180 0.4880537 0.03272303 0.3257065
## 88 88 9.639757 0.19826580 7.334796 0.4916190 0.03304397 0.3232741
## 89 89 9.646705 0.19744152 7.342021 0.4913420 0.03335946 0.3271849
## 90 90 9.660434 0.19516634 7.350607 0.4778613 0.03334719 0.3208404
## 91 91 9.643725 0.19789868 7.340807 0.4907398 0.03361582 0.3231447
## 92 92 9.643155 0.19779708 7.338427 0.4902939 0.03316932 0.3182344
## 93 93 9.642154 0.19812880 7.332556 0.4915675 0.03349869 0.3271316
## 94 94 9.638975 0.19861292 7.331924 0.4894331 0.03318281 0.3253844
## 95 95 9.642294 0.19778840 7.327382 0.4928829 0.03353639 0.3200677
## 96 96 9.638688 0.19829174 7.338738 0.4811441 0.03118662 0.3165684
## 97 97 9.641035 0.19809478 7.331646 0.4872268 0.03264268 0.3241087
## 98 98 9.637950 0.19867922 7.334649 0.4861633 0.03264987 0.3251743
## 99 99 9.638709 0.19854400 7.335793 0.4837175 0.03243174 0.3242451
## 100 100 9.647845 0.19707847 7.345731 0.4790327 0.03358016 0.3243687
## 101 101 9.634841 0.19905561 7.336650 0.4860725 0.03261444 0.3266205
## 102 102 9.644171 0.19777240 7.336937 0.4892410 0.03245244 0.3272985
## 103 103 9.637043 0.19888613 7.333817 0.4860516 0.03252267 0.3258162
## 104 104 9.644544 0.19759412 7.342168 0.4880178 0.03442113 0.3290760
## 105 105 9.648470 0.19700612 7.339022 0.4780763 0.03309997 0.3234181
## 106 106 9.626111 0.20025674 7.318176 0.4701244 0.03084190 0.3068369
## 107 107 9.641499 0.19812141 7.333168 0.4795677 0.03324143 0.3225631
## 108 108 9.625872 0.20075685 7.323937 0.4940935 0.03435193 0.3278834
## 109 109 9.634808 0.19935575 7.329201 0.4917019 0.03365578 0.3272076
## 110 110 9.636990 0.19899400 7.338645 0.4987768 0.03619888 0.3338182
## 111 111 9.640986 0.19793926 7.339657 0.4890052 0.03413145 0.3239878
## 112 112 9.644118 0.19791443 7.341766 0.4947524 0.03551866 0.3317083
## 113 113 9.637317 0.19904490 7.332003 0.4932715 0.03390863 0.3271163
## 114 114 9.637493 0.19901546 7.331498 0.4933230 0.03381400 0.3261352
## 115 115 9.647355 0.19750109 7.342614 0.4965370 0.03577645 0.3310894
## 116 116 9.628669 0.20016285 7.326723 0.4905149 0.03317235 0.3251674
## 117 117 9.636408 0.19914600 7.331753 0.4930449 0.03368017 0.3257613
## 118 118 9.634861 0.19936600 7.332259 0.4949759 0.03405248 0.3269493
## 119 119 9.630940 0.19980799 7.331596 0.4931298 0.03385856 0.3259838
## 120 120 9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121 121 9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122 122 9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123 123 9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124 124 9.632594 0.19954532 7.329751 0.4926203 0.03392131 0.3241588
## 125 125 9.634750 0.19940906 7.330999 0.4917910 0.03403052 0.3241592
## 126 126 9.631275 0.19999527 7.331469 0.4942490 0.03460944 0.3230693
## 127 127 9.626061 0.20042196 7.329419 0.5007588 0.03475107 0.3242564
## 128 128 9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129 129 9.631418 0.19947836 7.333752 0.4935846 0.03315973 0.3177274
## 130 130 9.636154 0.19905206 7.331498 0.4945305 0.03435957 0.3264199
## 131 131 9.642816 0.19820577 7.335905 0.5042671 0.03537116 0.3313577
## 132 132 9.629037 0.19996286 7.326142 0.4731106 0.03131379 0.3081658
## 133 133 9.625251 0.20068170 7.322689 0.4788724 0.03271940 0.3139728
## 134 134 9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135 135 9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136 136 9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137 137 9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138 138 9.621678 0.20118880 7.327529 0.5028781 0.03517881 0.3270326
## 139 139 9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140 140 9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141 141 9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142 142 9.642788 0.19791308 7.339045 0.4868270 0.03432553 0.3228355
## 143 143 9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144 144 9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145 145 9.626272 0.20052867 7.326845 0.4804015 0.03230230 0.3110429
## 146 146 9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147 147 9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148 148 9.633109 0.19971418 7.334493 0.4937935 0.03394853 0.3244833
## 149 149 9.627775 0.20031032 7.327099 0.5091607 0.03566012 0.3294421
## 150 150 9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151 151 9.634510 0.19951189 7.334595 0.4940863 0.03380899 0.3244866
## 152 152 9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153 153 9.634018 0.19960311 7.333678 0.4942584 0.03391589 0.3247875
## 154 154 9.627332 0.20050814 7.328804 0.4979133 0.03440849 0.3257559
## 155 155 9.627752 0.20033229 7.326627 0.4845444 0.03316572 0.3156675
## 156 156 9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157 157 9.631847 0.19981085 7.336929 0.4953112 0.03454456 0.3258353
## 158 158 9.632576 0.19969635 7.332822 0.4948467 0.03445118 0.3271549
## 159 159 9.637735 0.19902471 7.336996 0.4890297 0.03405738 0.3226827
## 160 160 9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161 161 9.632377 0.19984984 7.331586 0.4938375 0.03417048 0.3246630
## 162 162 9.636927 0.19921567 7.335198 0.4917200 0.03433715 0.3243667
## 163 163 9.630493 0.20013581 7.329566 0.4939151 0.03416016 0.3244173
## 164 164 9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
## nvmax
## 35 35
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 125.54332383 125.29362862 125.793019041
## PC1 -0.13107191 -0.15282709 -0.109316740
## PC2 -0.26143362 -0.28330665 -0.239560590
## PC3 -0.12353972 -0.14559136 -0.101488091
## PC4 -0.09593267 -0.11837892 -0.073486433
## PC5 0.05160143 0.02846952 0.074733329
## PC6 -0.03163802 -0.05480317 -0.008472871
## PC7 -0.04906467 -0.07281142 -0.025317927
## PC11 -0.14774898 -0.17458577 -0.120912196
## PC12 -0.14961413 -0.17822848 -0.120999770
## PC13 0.08898327 0.06009396 0.117872582
## PC14 0.08014441 0.05019583 0.110092985
## PC16 0.10817657 0.07724294 0.139110211
## PC17 -0.06639171 -0.09881922 -0.033964189
## PC18 -0.10980108 -0.14385685 -0.075745315
## PC20 0.12353678 0.08606610 0.161007456
## PC24 -0.20854604 -0.29687258 -0.120219492
## PC29 0.15284133 0.03938205 0.266300606
## PC32 -0.22568379 -0.35057007 -0.100797519
## PC34 0.34476425 0.21004354 0.479484960
## PC59 0.25147217 0.06760204 0.435342306
## PC64 -0.23383042 -0.42233991 -0.045320920
## PC68 0.25728527 0.06318426 0.451386288
## PC71 0.26255777 0.06858327 0.456532272
## PC83 -0.25700407 -0.46206759 -0.051940554
## PC85 0.35254319 0.14589681 0.559189573
## PC87 0.44709472 0.23767593 0.656513505
## PC106 0.30169008 0.08328181 0.520098344
## PC115 -0.41081054 -0.63229474 -0.189326348
## PC123 -0.25214503 -0.47416307 -0.030126988
## PC131 -0.30740473 -0.53322038 -0.081589075
## PC144 0.37345939 0.14227856 0.604640227
## PC146 0.28367384 0.05218695 0.515160736
## PC159 0.45444257 0.22038775 0.688497398
## PC162 -0.39209913 -0.62710258 -0.157095672
## PC163 0.32150076 0.08577379 0.557227726
if (algo.stepwise.caret == TRUE){
test.model(model.stepwise, data.test
,method = 'leapSeq',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 106.0 122.9 126.4 125.8 129.3 136.5
## [1] "leapSeq Test MSE: 94.1366795585845"
## [1] "leapSeq Test RMSE: 9.70240586445365"
## [1] "leapSeq Test MSE (Org Scale): 94.1366795585845"
## [1] "leapSeq Test RMSE (Org Scale): 9.70240586445365"
if (algo.LASSO.caret == TRUE){
set.seed(1)
tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "glmnet"
,subopt = 'LASSO'
,tune.grid = tune.grid
,feature.names = feature.names)
model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.01 on full training set
## glmnet
##
## 5584 samples
## 164 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0001000000 9.628757 0.2001968 7.328930
## 0.0001047616 9.628757 0.2001968 7.328930
## 0.0001097499 9.628757 0.2001968 7.328930
## 0.0001149757 9.628757 0.2001968 7.328930
## 0.0001204504 9.628757 0.2001968 7.328930
## 0.0001261857 9.628757 0.2001968 7.328930
## 0.0001321941 9.628757 0.2001968 7.328930
## 0.0001384886 9.628757 0.2001968 7.328930
## 0.0001450829 9.628757 0.2001968 7.328930
## 0.0001519911 9.628757 0.2001968 7.328930
## 0.0001592283 9.628757 0.2001968 7.328930
## 0.0001668101 9.628757 0.2001968 7.328930
## 0.0001747528 9.628757 0.2001968 7.328930
## 0.0001830738 9.628757 0.2001968 7.328930
## 0.0001917910 9.628757 0.2001968 7.328930
## 0.0002009233 9.628757 0.2001968 7.328930
## 0.0002104904 9.628757 0.2001968 7.328930
## 0.0002205131 9.628757 0.2001968 7.328930
## 0.0002310130 9.628757 0.2001968 7.328930
## 0.0002420128 9.628757 0.2001968 7.328930
## 0.0002535364 9.628757 0.2001968 7.328930
## 0.0002656088 9.628757 0.2001968 7.328930
## 0.0002782559 9.628757 0.2001968 7.328930
## 0.0002915053 9.628757 0.2001968 7.328930
## 0.0003053856 9.628757 0.2001968 7.328930
## 0.0003199267 9.628757 0.2001968 7.328930
## 0.0003351603 9.628757 0.2001968 7.328930
## 0.0003511192 9.628757 0.2001968 7.328930
## 0.0003678380 9.628757 0.2001968 7.328930
## 0.0003853529 9.628757 0.2001968 7.328930
## 0.0004037017 9.628757 0.2001968 7.328930
## 0.0004229243 9.628757 0.2001968 7.328930
## 0.0004430621 9.628757 0.2001968 7.328930
## 0.0004641589 9.628757 0.2001968 7.328930
## 0.0004862602 9.628757 0.2001968 7.328930
## 0.0005094138 9.628757 0.2001968 7.328930
## 0.0005336699 9.628757 0.2001968 7.328930
## 0.0005590810 9.628757 0.2001968 7.328930
## 0.0005857021 9.628757 0.2001968 7.328930
## 0.0006135907 9.628757 0.2001968 7.328930
## 0.0006428073 9.628757 0.2001968 7.328930
## 0.0006734151 9.628757 0.2001968 7.328930
## 0.0007054802 9.628757 0.2001968 7.328930
## 0.0007390722 9.628757 0.2001968 7.328930
## 0.0007742637 9.628757 0.2001968 7.328930
## 0.0008111308 9.628757 0.2001968 7.328930
## 0.0008497534 9.628757 0.2001968 7.328930
## 0.0008902151 9.628757 0.2001968 7.328930
## 0.0009326033 9.628757 0.2001968 7.328930
## 0.0009770100 9.628757 0.2001968 7.328930
## 0.0010235310 9.628757 0.2001968 7.328930
## 0.0010722672 9.628757 0.2001968 7.328930
## 0.0011233240 9.628757 0.2001968 7.328930
## 0.0011768120 9.628757 0.2001968 7.328930
## 0.0012328467 9.628757 0.2001968 7.328930
## 0.0012915497 9.628757 0.2001968 7.328930
## 0.0013530478 9.628757 0.2001968 7.328930
## 0.0014174742 9.628757 0.2001968 7.328930
## 0.0014849683 9.628757 0.2001968 7.328930
## 0.0015556761 9.628757 0.2001968 7.328930
## 0.0016297508 9.628757 0.2001968 7.328930
## 0.0017073526 9.628757 0.2001968 7.328930
## 0.0017886495 9.628757 0.2001968 7.328930
## 0.0018738174 9.628757 0.2001968 7.328930
## 0.0019630407 9.628757 0.2001968 7.328930
## 0.0020565123 9.628757 0.2001968 7.328930
## 0.0021544347 9.628757 0.2001968 7.328930
## 0.0022570197 9.628757 0.2001968 7.328930
## 0.0023644894 9.628757 0.2001968 7.328930
## 0.0024770764 9.628757 0.2001968 7.328930
## 0.0025950242 9.628757 0.2001968 7.328930
## 0.0027185882 9.628757 0.2001968 7.328930
## 0.0028480359 9.628693 0.2002031 7.328875
## 0.0029836472 9.628553 0.2002156 7.328759
## 0.0031257158 9.628403 0.2002287 7.328635
## 0.0032745492 9.628247 0.2002424 7.328504
## 0.0034304693 9.628085 0.2002565 7.328369
## 0.0035938137 9.627915 0.2002713 7.328227
## 0.0037649358 9.627738 0.2002868 7.328079
## 0.0039442061 9.627553 0.2003030 7.327924
## 0.0041320124 9.627360 0.2003198 7.327763
## 0.0043287613 9.627160 0.2003372 7.327594
## 0.0045348785 9.626950 0.2003556 7.327418
## 0.0047508102 9.626729 0.2003750 7.327232
## 0.0049770236 9.626498 0.2003953 7.327037
## 0.0052140083 9.626256 0.2004167 7.326834
## 0.0054622772 9.626004 0.2004391 7.326621
## 0.0057223677 9.625741 0.2004623 7.326399
## 0.0059948425 9.625467 0.2004865 7.326172
## 0.0062802914 9.625182 0.2005117 7.325936
## 0.0065793322 9.624884 0.2005381 7.325693
## 0.0068926121 9.624575 0.2005655 7.325444
## 0.0072208090 9.624251 0.2005943 7.325182
## 0.0075646333 9.623915 0.2006242 7.324914
## 0.0079248290 9.623564 0.2006556 7.324634
## 0.0083021757 9.623200 0.2006881 7.324345
## 0.0086974900 9.622821 0.2007222 7.324043
## 0.0091116276 9.622428 0.2007573 7.323732
## 0.0095454846 9.622019 0.2007940 7.323405
## 0.0100000000 9.621592 0.2008325 7.323063
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.01.
## alpha lambda
## 100 1 0.01
## alpha lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.0001000000 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 2 1 0.0001047616 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 3 1 0.0001097499 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 4 1 0.0001149757 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 5 1 0.0001204504 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 6 1 0.0001261857 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 7 1 0.0001321941 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 8 1 0.0001384886 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 9 1 0.0001450829 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 10 1 0.0001519911 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 11 1 0.0001592283 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 12 1 0.0001668101 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 13 1 0.0001747528 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 14 1 0.0001830738 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 15 1 0.0001917910 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 16 1 0.0002009233 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 17 1 0.0002104904 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 18 1 0.0002205131 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 19 1 0.0002310130 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 20 1 0.0002420128 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 21 1 0.0002535364 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 22 1 0.0002656088 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 23 1 0.0002782559 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 24 1 0.0002915053 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 25 1 0.0003053856 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 26 1 0.0003199267 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 27 1 0.0003351603 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 28 1 0.0003511192 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 29 1 0.0003678380 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 30 1 0.0003853529 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 31 1 0.0004037017 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 32 1 0.0004229243 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 33 1 0.0004430621 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 34 1 0.0004641589 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 35 1 0.0004862602 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 36 1 0.0005094138 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 37 1 0.0005336699 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 38 1 0.0005590810 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 39 1 0.0005857021 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 40 1 0.0006135907 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 41 1 0.0006428073 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 42 1 0.0006734151 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 43 1 0.0007054802 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 44 1 0.0007390722 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 45 1 0.0007742637 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 46 1 0.0008111308 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 47 1 0.0008497534 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 48 1 0.0008902151 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 49 1 0.0009326033 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 50 1 0.0009770100 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 51 1 0.0010235310 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 52 1 0.0010722672 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 53 1 0.0011233240 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 54 1 0.0011768120 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 55 1 0.0012328467 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 56 1 0.0012915497 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 57 1 0.0013530478 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 58 1 0.0014174742 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 59 1 0.0014849683 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 60 1 0.0015556761 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 61 1 0.0016297508 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 62 1 0.0017073526 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 63 1 0.0017886495 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 64 1 0.0018738174 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 65 1 0.0019630407 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 66 1 0.0020565123 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 67 1 0.0021544347 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 68 1 0.0022570197 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 69 1 0.0023644894 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 70 1 0.0024770764 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 71 1 0.0025950242 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 72 1 0.0027185882 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 73 1 0.0028480359 9.628693 0.2002031 7.328875 0.4933233 0.03406540 0.3244276
## 74 1 0.0029836472 9.628553 0.2002156 7.328759 0.4933087 0.03406549 0.3244242
## 75 1 0.0031257158 9.628403 0.2002287 7.328635 0.4932924 0.03406551 0.3244197
## 76 1 0.0032745492 9.628247 0.2002424 7.328504 0.4932744 0.03406542 0.3244144
## 77 1 0.0034304693 9.628085 0.2002565 7.328369 0.4932573 0.03406547 0.3244099
## 78 1 0.0035938137 9.627915 0.2002713 7.328227 0.4932391 0.03406544 0.3244062
## 79 1 0.0037649358 9.627738 0.2002868 7.328079 0.4932205 0.03406546 0.3244025
## 80 1 0.0039442061 9.627553 0.2003030 7.327924 0.4932028 0.03406567 0.3243998
## 81 1 0.0041320124 9.627360 0.2003198 7.327763 0.4931839 0.03406580 0.3243975
## 82 1 0.0043287613 9.627160 0.2003372 7.327594 0.4931651 0.03406603 0.3243958
## 83 1 0.0045348785 9.626950 0.2003556 7.327418 0.4931446 0.03406625 0.3243935
## 84 1 0.0047508102 9.626729 0.2003750 7.327232 0.4931225 0.03406655 0.3243908
## 85 1 0.0049770236 9.626498 0.2003953 7.327037 0.4930993 0.03406691 0.3243872
## 86 1 0.0052140083 9.626256 0.2004167 7.326834 0.4930751 0.03406735 0.3243795
## 87 1 0.0054622772 9.626004 0.2004391 7.326621 0.4930500 0.03406782 0.3243713
## 88 1 0.0057223677 9.625741 0.2004623 7.326399 0.4930236 0.03406829 0.3243602
## 89 1 0.0059948425 9.625467 0.2004865 7.326172 0.4929945 0.03406850 0.3243407
## 90 1 0.0062802914 9.625182 0.2005117 7.325936 0.4929638 0.03406869 0.3243219
## 91 1 0.0065793322 9.624884 0.2005381 7.325693 0.4929281 0.03406849 0.3242989
## 92 1 0.0068926121 9.624575 0.2005655 7.325444 0.4928909 0.03406845 0.3242754
## 93 1 0.0072208090 9.624251 0.2005943 7.325182 0.4928511 0.03406846 0.3242504
## 94 1 0.0075646333 9.623915 0.2006242 7.324914 0.4928092 0.03406875 0.3242255
## 95 1 0.0079248290 9.623564 0.2006556 7.324634 0.4927654 0.03406918 0.3241992
## 96 1 0.0083021757 9.623200 0.2006881 7.324345 0.4927197 0.03406987 0.3241664
## 97 1 0.0086974900 9.622821 0.2007222 7.324043 0.4926739 0.03407112 0.3241289
## 98 1 0.0091116276 9.622428 0.2007573 7.323732 0.4926238 0.03407250 0.3240907
## 99 1 0.0095454846 9.622019 0.2007940 7.323405 0.4925726 0.03407415 0.3240515
## 100 1 0.0100000000 9.621592 0.2008325 7.323063 0.4925203 0.03407606 0.3240121
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients"
## model.coef
## (Intercept) 1.255514e+02
## PC1 -1.298723e-01
## PC2 -2.600514e-01
## PC3 -1.234301e-01
## PC4 -9.568324e-02
## PC5 5.082205e-02
## PC6 -3.161405e-02
## PC7 -4.871874e-02
## PC8 -1.182496e-02
## PC9 -1.231822e-02
## PC10 -5.376536e-03
## PC11 -1.452548e-01
## PC12 -1.476409e-01
## PC13 8.763595e-02
## PC14 7.771212e-02
## PC15 -5.684620e-03
## PC16 1.060543e-01
## PC17 -6.495879e-02
## PC18 -1.100534e-01
## PC19 1.238598e-02
## PC20 1.216020e-01
## PC21 2.235409e-02
## PC22 3.451018e-02
## PC23 6.763238e-02
## PC24 -2.108389e-01
## PC25 2.832458e-02
## PC26 9.670610e-02
## PC27 9.073414e-02
## PC28 3.915594e-02
## PC29 1.469626e-01
## PC30 1.525033e-02
## PC31 -5.692113e-02
## PC32 -2.210111e-01
## PC33 7.002469e-02
## PC34 3.433023e-01
## PC36 -7.127674e-03
## PC37 -1.162535e-01
## PC38 -8.295870e-05
## PC39 -4.647233e-02
## PC40 -1.028411e-01
## PC41 2.063495e-02
## PC42 -4.918385e-02
## PC44 2.320489e-02
## PC45 -1.070338e-02
## PC46 1.164440e-01
## PC47 -1.225217e-01
## PC48 1.860308e-02
## PC49 -1.655462e-02
## PC50 -8.590795e-02
## PC51 1.719985e-02
## PC52 -1.654400e-02
## PC53 4.646695e-02
## PC54 -4.027648e-02
## PC55 1.946765e-02
## PC57 -1.514104e-01
## PC58 -2.042308e-02
## PC59 2.421222e-01
## PC60 -9.946129e-02
## PC61 9.710888e-02
## PC62 -1.149038e-01
## PC63 -1.013142e-01
## PC64 -2.263178e-01
## PC65 -3.519116e-02
## PC66 -1.320295e-01
## PC67 -2.937963e-02
## PC68 2.631703e-01
## PC69 1.077511e-01
## PC70 -1.171616e-02
## PC71 2.494247e-01
## PC72 -5.314494e-04
## PC73 5.268753e-02
## PC74 -9.890600e-02
## PC75 -1.938554e-01
## PC76 9.764328e-03
## PC77 1.542149e-01
## PC78 4.919469e-02
## PC79 1.109009e-01
## PC80 -8.258235e-02
## PC81 2.061036e-01
## PC82 1.013928e-01
## PC83 -2.413708e-01
## PC84 2.100872e-01
## PC85 3.434338e-01
## PC86 -7.652314e-02
## PC87 4.457219e-01
## PC88 -2.006078e-01
## PC89 -1.901147e-01
## PC90 -1.805138e-01
## PC91 5.984314e-02
## PC92 3.416878e-02
## PC93 -6.015802e-03
## PC94 -2.106125e-01
## PC95 2.679723e-04
## PC96 -2.041438e-01
## PC97 -1.295453e-01
## PC98 -7.977836e-02
## PC99 -1.271480e-01
## PC101 -1.093273e-01
## PC102 -2.101682e-01
## PC103 1.053122e-01
## PC104 -1.506616e-01
## PC105 1.658028e-01
## PC106 2.843931e-01
## PC107 -4.107080e-02
## PC108 1.742891e-01
## PC109 -2.886080e-02
## PC110 -5.672525e-02
## PC111 -1.683311e-01
## PC112 -6.304220e-03
## PC113 9.302556e-02
## PC114 -1.413251e-01
## PC115 -4.034061e-01
## PC116 -4.770328e-02
## PC117 -1.455947e-02
## PC118 1.518460e-01
## PC119 -2.248756e-01
## PC120 6.098526e-02
## PC121 -1.001928e-01
## PC122 1.553259e-01
## PC123 -2.363524e-01
## PC124 1.354237e-01
## PC125 1.030857e-01
## PC126 1.181573e-01
## PC127 5.295171e-02
## PC128 -1.522229e-01
## PC129 -7.851309e-02
## PC130 8.734756e-02
## PC131 -2.966303e-01
## PC132 6.152004e-02
## PC133 4.252599e-02
## PC134 2.275716e-01
## PC135 1.918110e-01
## PC136 9.601589e-02
## PC137 -1.197720e-01
## PC138 1.383688e-01
## PC139 -2.054473e-01
## PC140 -4.580045e-02
## PC141 8.578545e-02
## PC142 -7.112606e-02
## PC143 7.781291e-02
## PC144 3.678276e-01
## PC145 6.121409e-02
## PC146 2.708227e-01
## PC147 3.133738e-02
## PC148 -6.746369e-02
## PC149 3.498798e-02
## PC150 6.728299e-02
## PC151 1.467252e-01
## PC152 -1.902990e-02
## PC153 1.387096e-01
## PC154 -1.781924e-01
## PC155 1.994517e-01
## PC156 2.442047e-01
## PC157 7.530545e-03
## PC158 -8.099376e-02
## PC159 4.502749e-01
## PC160 -1.459776e-02
## PC161 8.784971e-02
## PC162 -3.902729e-01
## PC163 3.040614e-01
## PC164 6.320154e-02
if (algo.LASSO.caret == TRUE){
test.model(model.LASSO.caret, data.test
,method = 'glmnet',subopt = "LASSO"
,formula = formula, feature.names = feature.names, label.names = label.names
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104.8 122.9 126.6 125.8 129.4 137.9
## [1] "glmnet LASSO Test MSE: 92.7288734215879"
## [1] "glmnet LASSO Test RMSE: 9.62958324236246"
## [1] "glmnet LASSO Test MSE (Org Scale): 92.7288734215879"
## [1] "glmnet LASSO Test RMSE (Org Scale): 9.62958324236246"
if (algo.LARS.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "lars"
,subopt = 'NULL'
,feature.names = feature.names)
model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.707 on full training set
## Least Angle Regression
##
## 5584 samples
## 164 predictor
##
## Pre-processing: centered (164), scaled (164)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ...
## Resampling results across tuning parameters:
##
## fraction RMSE Rsquared MAE
## 0.00000000 10.740673 NaN 8.176476
## 0.01010101 10.636310 0.08054327 8.103499
## 0.02020202 10.545888 0.08054327 8.041118
## 0.03030303 10.469772 0.08054327 7.988135
## 0.04040404 10.410668 0.08318423 7.945482
## 0.05050505 10.360653 0.09343877 7.908528
## 0.06060606 10.312276 0.10413069 7.871622
## 0.07070707 10.265454 0.11383566 7.835991
## 0.08080808 10.221609 0.12178841 7.802879
## 0.09090909 10.180980 0.12882159 7.771726
## 0.10101010 10.142173 0.13491040 7.741814
## 0.11111111 10.106561 0.13989973 7.713700
## 0.12121212 10.074123 0.14432258 7.687718
## 0.13131313 10.044176 0.14851574 7.663812
## 0.14141414 10.015478 0.15265112 7.641146
## 0.15151515 9.987705 0.15658075 7.619140
## 0.16161616 9.962041 0.16005314 7.599077
## 0.17171717 9.938053 0.16317864 7.580037
## 0.18181818 9.915559 0.16608790 7.562125
## 0.19191919 9.894247 0.16887172 7.545199
## 0.20202020 9.874727 0.17133655 7.529376
## 0.21212121 9.856493 0.17363442 7.514536
## 0.22222222 9.839380 0.17576509 7.500448
## 0.23232323 9.822725 0.17788673 7.486630
## 0.24242424 9.806367 0.18004642 7.473486
## 0.25252525 9.790549 0.18210545 7.460574
## 0.26262626 9.775439 0.18404661 7.448257
## 0.27272727 9.761376 0.18582268 7.437020
## 0.28282828 9.748209 0.18748135 7.426515
## 0.29292929 9.736044 0.18897239 7.416933
## 0.30303030 9.724749 0.19034319 7.407788
## 0.31313131 9.714093 0.19165262 7.399139
## 0.32323232 9.704186 0.19285852 7.391046
## 0.33333333 9.694926 0.19397291 7.383263
## 0.34343434 9.686171 0.19500941 7.375801
## 0.35353535 9.678382 0.19588306 7.369035
## 0.36363636 9.671380 0.19664406 7.362983
## 0.37373737 9.664654 0.19737882 7.357257
## 0.38383838 9.658404 0.19805303 7.352236
## 0.39393939 9.652684 0.19865287 7.347895
## 0.40404040 9.647387 0.19919329 7.344019
## 0.41414141 9.642813 0.19961933 7.340770
## 0.42424242 9.638565 0.20001217 7.337818
## 0.43434343 9.634259 0.20043548 7.334793
## 0.44444444 9.630151 0.20083544 7.331842
## 0.45454545 9.626363 0.20118963 7.329147
## 0.46464646 9.622692 0.20154942 7.326431
## 0.47474747 9.619097 0.20191315 7.323738
## 0.48484848 9.615833 0.20223151 7.321398
## 0.49494949 9.612989 0.20249066 7.319358
## 0.50505051 9.610319 0.20273424 7.317397
## 0.51515152 9.607776 0.20296984 7.315509
## 0.52525253 9.605494 0.20317113 7.313886
## 0.53535354 9.603457 0.20333877 7.312564
## 0.54545455 9.601584 0.20348674 7.311304
## 0.55555556 9.599904 0.20361467 7.310203
## 0.56565657 9.598498 0.20370590 7.309232
## 0.57575758 9.597138 0.20380285 7.308222
## 0.58585859 9.595968 0.20387619 7.307365
## 0.59595960 9.594833 0.20395177 7.306563
## 0.60606061 9.593641 0.20404658 7.305728
## 0.61616162 9.592602 0.20412510 7.304935
## 0.62626263 9.591700 0.20419041 7.304204
## 0.63636364 9.590913 0.20424497 7.303479
## 0.64646465 9.590324 0.20427233 7.302841
## 0.65656566 9.589767 0.20430324 7.302208
## 0.66666667 9.589301 0.20432666 7.301686
## 0.67676768 9.589016 0.20432785 7.301260
## 0.68686869 9.588813 0.20432343 7.300916
## 0.69696970 9.588635 0.20432239 7.300541
## 0.70707071 9.588583 0.20430746 7.300334
## 0.71717172 9.588719 0.20426937 7.300323
## 0.72727273 9.588941 0.20422417 7.300401
## 0.73737374 9.589245 0.20417370 7.300574
## 0.74747475 9.589757 0.20409730 7.300824
## 0.75757576 9.590414 0.20400461 7.301171
## 0.76767677 9.591188 0.20390007 7.301593
## 0.77777778 9.592073 0.20378511 7.302090
## 0.78787879 9.593083 0.20365805 7.302622
## 0.79797980 9.594175 0.20352592 7.303201
## 0.80808081 9.595316 0.20339283 7.303800
## 0.81818182 9.596514 0.20325919 7.304489
## 0.82828283 9.597811 0.20311739 7.305257
## 0.83838384 9.599157 0.20297441 7.306105
## 0.84848485 9.600494 0.20284028 7.306941
## 0.85858586 9.601934 0.20269657 7.307833
## 0.86868687 9.603439 0.20255020 7.308798
## 0.87878788 9.605089 0.20238865 7.309945
## 0.88888889 9.606864 0.20221548 7.311279
## 0.89898990 9.608678 0.20204289 7.312740
## 0.90909091 9.610610 0.20185842 7.314229
## 0.91919192 9.612649 0.20166456 7.315823
## 0.92929293 9.614788 0.20146225 7.317547
## 0.93939394 9.617034 0.20125067 7.319382
## 0.94949495 9.619315 0.20104033 7.321251
## 0.95959596 9.621656 0.20082686 7.323109
## 0.96969697 9.624064 0.20061096 7.325030
## 0.97979798 9.626558 0.20038991 7.327087
## 0.98989899 9.629118 0.20016628 7.329227
## 1.00000000 9.631757 0.19993708 7.331435
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.7070707.
## fraction
## 71 0.7070707
## Warning: Removed 1 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients"
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
## -1.424759281 -2.905573692 -1.321880264 -0.990975047 0.482156096 -0.264676646 -0.442113493 -0.050793492 -0.048547880
## PC11 PC12 PC13 PC14 PC16 PC17 PC18 PC19 PC20
## -1.292131595 -1.219916838 0.687993659 0.578233980 0.784754329 -0.421434288 -0.729519522 0.024438093 0.741002594
## PC21 PC22 PC23 PC24 PC25 PC26 PC27 PC28 PC29
## 0.065602954 0.060356027 0.155852581 -0.517885031 0.004413801 0.165073540 0.148242896 0.025712032 0.252636654
## PC31 PC32 PC33 PC34 PC37 PC39 PC40 PC46 PC47
## -0.044144357 -0.370009311 0.075088775 0.562426246 -0.123734915 -0.003322229 -0.084965145 0.100640564 -0.114445165
## PC50 PC57 PC59 PC60 PC61 PC62 PC63 PC64 PC66
## -0.044038810 -0.138447034 0.257711328 -0.061795131 0.066491263 -0.082098604 -0.069013829 -0.231286347 -0.093882228
## PC68 PC69 PC71 PC74 PC75 PC77 PC79 PC80 PC81
## 0.255872831 0.068123233 0.252562286 -0.056262840 -0.176146927 0.120872462 0.065728965 -0.039002844 0.179173974
## PC82 PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90
## 0.054773863 -0.225029993 0.181227827 0.340635496 -0.027685846 0.455764108 -0.172712684 -0.151924429 -0.151277663
## PC94 PC96 PC97 PC98 PC99 PC101 PC102 PC103 PC104
## -0.173400126 -0.157380498 -0.082297416 -0.020826812 -0.084731908 -0.041873861 -0.168316912 0.046027897 -0.100638375
## PC105 PC106 PC108 PC111 PC113 PC114 PC115 PC118 PC119
## 0.116658961 0.258231041 0.123870661 -0.123582279 0.035670689 -0.092657741 -0.387850355 0.098205841 -0.181671106
## PC121 PC122 PC123 PC124 PC125 PC126 PC128 PC129 PC130
## -0.034667175 0.108940290 -0.203912501 0.078664212 0.044342978 0.061066321 -0.103950454 -0.011784781 0.029231367
## PC131 PC132 PC134 PC135 PC136 PC137 PC138 PC139 PC141
## -0.260860418 0.001506585 0.188570163 0.134471399 0.027980488 -0.057560171 0.084411698 -0.153139584 0.022680935
## PC142 PC143 PC144 PC145 PC146 PC151 PC153 PC154 PC155
## -0.011368996 0.012841400 0.328561321 0.004342598 0.224944337 0.082998239 0.077911199 -0.110589656 0.148156983
## PC156 PC158 PC159 PC161 PC162 PC163
## 0.187922753 -0.018870079 0.403930451 0.035022196 -0.339648142 0.251923948
if (algo.LARS.caret == TRUE){
test.model(model.LARS.caret, data.test
,method = 'lars',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 106.5 123.2 126.5 125.7 129.0 136.2
## [1] "lars Test MSE: 92.1350105563208"
## [1] "lars Test RMSE: 9.59869837823446"
## [1] "lars Test MSE (Org Scale): 92.1350105563208"
## [1] "lars Test RMSE (Org Scale): 9.59869837823446"
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 knitr_1.20 htmltools_0.3.6 reshape2_1.4.3
## [5] lars_1.2 doParallel_1.0.14 iterators_1.0.10 caret_6.0-81
## [9] leaps_3.0 ggforce_0.1.3 rlist_0.4.6.1 car_3.0-2
## [13] carData_3.0-2 bestNormalize_1.3.0 scales_1.0.0 onewaytests_2.0
## [17] caTools_1.17.1.1 mosaic_1.5.0 mosaicData_0.17.0 ggformula_0.9.1
## [21] ggstance_0.3.1 lattice_0.20-35 DT_0.5 ggiraph_0.6.0
## [25] investr_1.4.0 glmnet_2.0-16 foreach_1.4.4 Matrix_1.2-14
## [29] MASS_7.3-50 PerformanceAnalytics_1.5.2 xts_0.11-2 zoo_1.8-4
## [33] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.8 purrr_0.2.5
## [37] readr_1.3.1 tidyr_0.8.2 tibble_1.4.2 ggplot2_3.1.0
## [41] tidyverse_1.2.1 usdm_1.1-18 raster_2.8-4 sp_1.3-1
## [45] pacman_0.5.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.2.0 backports_1.1.3 plyr_1.8.4 lazyeval_0.2.1 splines_3.5.1 mycor_0.1.1
## [7] crosstalk_1.0.0 leaflet_2.0.2 digest_0.6.18 magrittr_1.5 mosaicCore_0.6.0 openxlsx_4.1.0
## [13] recipes_0.1.4 modelr_0.1.2 gower_0.1.2 colorspace_1.3-2 rvest_0.3.2 ggrepel_0.8.0
## [19] haven_2.0.0 crayon_1.3.4 jsonlite_1.5 bindr_0.1.1 survival_2.42-3 glue_1.3.0
## [25] registry_0.5 gtable_0.2.0 ppcor_1.1 ipred_0.9-8 abind_1.4-5 rngtools_1.3.1
## [31] bibtex_0.4.2 Rcpp_1.0.0 xtable_1.8-3 units_0.6-2 foreign_0.8-70 stats4_3.5.1
## [37] lava_1.6.4 prodlim_2018.04.18 htmlwidgets_1.3 httr_1.4.0 RColorBrewer_1.1-2 pkgconfig_2.0.2
## [43] farver_1.1.0 nnet_7.3-12 labeling_0.3 tidyselect_0.2.5 rlang_0.3.1 later_0.7.5
## [49] munsell_0.5.0 cellranger_1.1.0 tools_3.5.1 cli_1.0.1 generics_0.0.2 moments_0.14
## [55] sjlabelled_1.0.17 broom_0.5.1 evaluate_0.12 ggdendro_0.1-20 yaml_2.2.0 ModelMetrics_1.2.2
## [61] zip_2.0.1 nlme_3.1-137 doRNG_1.7.1 mime_0.6 xml2_1.2.0 compiler_3.5.1
## [67] rstudioapi_0.8 curl_3.2 tweenr_1.0.1 stringi_1.2.4 gdtools_0.1.7 pillar_1.3.1
## [73] data.table_1.11.8 bitops_1.0-6 insight_0.1.2 httpuv_1.4.5 R6_2.3.0 promises_1.0.1
## [79] gridExtra_2.3 rio_0.5.16 codetools_0.2-15 assertthat_0.2.0 pkgmaker_0.27 withr_2.1.2
## [85] nortest_1.0-4 mgcv_1.8-24 hms_0.4.2 quadprog_1.5-5 grid_3.5.1 rpart_4.1-13
## [91] timeDate_3043.102 class_7.3-14 rmarkdown_1.11 shiny_1.2.0 lubridate_1.7.4